To realize an exploratory data analysis of dataset ticdata.
#Import libraries
import pandas as pd
import numpy as np
import seaborn as sns
from matplotlib import pyplot as plt
import plotly.express as px
import requests
import pandas_profiling
from pandas_profiling.utils.cache import cache_file
from sklearn.impute import KNNImputer
import scipy.stats as ss
import warnings
warnings.filterwarnings('ignore')
pd.set_option('display.max_columns', 500)
pd.set_option('display.max_rows', 5000)
#Read the datasets and create our data frames
df_ticdata = pd.read_csv("/Users/joseabalcaamano/Desktop/MachineLearning/PracticaEDA/data/raw/ticdata2000.txt", sep = "\t", header = None)
df_ticeval = pd.read_csv("/Users/joseabalcaamano/Desktop/MachineLearning/PracticaEDA/data/raw/ticeval2000.txt", sep = "\t", header = None)
df_tictgts = pd.read_csv("/Users/joseabalcaamano/Desktop/MachineLearning/PracticaEDA/data/raw/tictgts2000.txt", sep = "\t", header = None)
headers = list(range(1,87))
df_ticdata.columns = headers
headers = list(range(1,87))
df_ticdata.columns = headers
#Add a new column differentiating the data in train and test. For us, our train is ticdata.
df_ticdata['87'] = 1
df_tictgts['87'] = 0
#To know the rows and columns of our dataset.
df_ticdata.shape
(5822, 87)
#Check if they are the same rows.
print(len(df_ticdata),
len(df_ticeval),
len(df_tictgts))
5822 4000 4000
#There are 86 variables in our dataset ticdata, divided in categoricals and numericals, and the number 86, CARAVAN,
#is our target variable
list_table = ["MOSTYPE", "MAANTHUI", "MGEMOMV",
"MGEMLEEF", "MOSHOOFD", "MGODRK",
"MGODPR", "MGODOV", "MGODGE",
"MRELGE", "MRELSA", "MRELOV", "MFALLEEN",
"MFGEKIND", "MFWEKIND", "MOPLHOOG", "MOPLMIDD",
"MOPLLAAG", "MBERHOOG", "MBERZELF",
"MBERBOER", "MBERMIDD", "MBERARBG", "MBERARBO",
"MSKA","MSKB1", "MSKB2", "MSKC",
"MSKD", "MHHUUR", "MHKOOP", "MAUT1", "MAUT2",
"MAUT0", "MZFONDS", "MZPART", "MINKM30",
"MINK3045", "MINK4575", "MINK7512", "MINK123M",
"MINKGEM", "MKOOPKLA", "PWAPART", "PWABEDR", "PWALAND",
"PPERSAUT", "PBESAUT", "PMOTSCO", "PVRAAUT",
"PAANHANG", "PTRACTOR", "PWERKT", "PBROM",
"PLEVEN", "PPERSONG", "PGEZONG", "PWAOREG",
"PBRAND", "PZEILPL", "PPLEZIER", "PFIETS","PINBOED", "PBYSTAND","AWAPART","AWABEDR",
"AWALAND","APERSAUT","ABESAUT","AMOTSCO","AVRAAUT","AAANHANG","ATRACTOR",
"AWERKT","ABROM","ALEVEN","APERSONG","AGEZONG","AWAOREG","ABRAND","AZEILPL","APLEZIER",
"AFIETS", "AINBOED", "ABYSTAND", "CARAVAN", "TRAIN"]
list_other_var = ['title']
list_tictgts = ["CARAVAN"]
len(list_table)
87
list_tictgts = ["CARAVAN", "TRAIN"]
list_var = ['title']
len(list_tictgts)
2
df_ticdata.columns= list_table
df_tictgts.columns = list_tictgts
df_ticeval.columns = list_table[:-2]
df_ticdata
| MOSTYPE | MAANTHUI | MGEMOMV | MGEMLEEF | MOSHOOFD | MGODRK | MGODPR | MGODOV | MGODGE | MRELGE | MRELSA | MRELOV | MFALLEEN | MFGEKIND | MFWEKIND | MOPLHOOG | MOPLMIDD | MOPLLAAG | MBERHOOG | MBERZELF | MBERBOER | MBERMIDD | MBERARBG | MBERARBO | MSKA | MSKB1 | MSKB2 | MSKC | MSKD | MHHUUR | MHKOOP | MAUT1 | MAUT2 | MAUT0 | MZFONDS | MZPART | MINKM30 | MINK3045 | MINK4575 | MINK7512 | MINK123M | MINKGEM | MKOOPKLA | PWAPART | PWABEDR | PWALAND | PPERSAUT | PBESAUT | PMOTSCO | PVRAAUT | PAANHANG | PTRACTOR | PWERKT | PBROM | PLEVEN | PPERSONG | PGEZONG | PWAOREG | PBRAND | PZEILPL | PPLEZIER | PFIETS | PINBOED | PBYSTAND | AWAPART | AWABEDR | AWALAND | APERSAUT | ABESAUT | AMOTSCO | AVRAAUT | AAANHANG | ATRACTOR | AWERKT | ABROM | ALEVEN | APERSONG | AGEZONG | AWAOREG | ABRAND | AZEILPL | APLEZIER | AFIETS | AINBOED | ABYSTAND | CARAVAN | TRAIN | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 33 | 1 | 3 | 2 | 8 | 0 | 5 | 1 | 3 | 7 | 0 | 2 | 1 | 2 | 6 | 1 | 2 | 7 | 1 | 0 | 1 | 2 | 5 | 2 | 1 | 1 | 2 | 6 | 1 | 1 | 8 | 8 | 0 | 1 | 8 | 1 | 0 | 4 | 5 | 0 | 0 | 4 | 3 | 0 | 0 | 0 | 6 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 5 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 1 |
| 1 | 37 | 1 | 2 | 2 | 8 | 1 | 4 | 1 | 4 | 6 | 2 | 2 | 0 | 4 | 5 | 0 | 5 | 4 | 0 | 0 | 0 | 5 | 0 | 4 | 0 | 2 | 3 | 5 | 0 | 2 | 7 | 7 | 1 | 2 | 6 | 3 | 2 | 0 | 5 | 2 | 0 | 5 | 4 | 2 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2 | 0 | 0 | 0 | 0 | 0 | 2 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 1 |
| 2 | 37 | 1 | 2 | 2 | 8 | 0 | 4 | 2 | 4 | 3 | 2 | 4 | 4 | 4 | 2 | 0 | 5 | 4 | 0 | 0 | 0 | 7 | 0 | 2 | 0 | 5 | 0 | 4 | 0 | 7 | 2 | 7 | 0 | 2 | 9 | 0 | 4 | 5 | 0 | 0 | 0 | 3 | 4 | 2 | 0 | 0 | 6 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 1 |
| 3 | 9 | 1 | 3 | 3 | 3 | 2 | 3 | 2 | 4 | 5 | 2 | 2 | 2 | 3 | 4 | 3 | 4 | 2 | 4 | 0 | 0 | 3 | 1 | 2 | 3 | 2 | 1 | 4 | 0 | 5 | 4 | 9 | 0 | 0 | 7 | 2 | 1 | 5 | 3 | 0 | 0 | 4 | 4 | 0 | 0 | 0 | 6 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 1 |
| 4 | 40 | 1 | 4 | 2 | 10 | 1 | 4 | 1 | 4 | 7 | 1 | 2 | 2 | 4 | 4 | 5 | 4 | 0 | 0 | 5 | 4 | 0 | 0 | 0 | 9 | 0 | 0 | 0 | 0 | 4 | 5 | 6 | 2 | 1 | 5 | 4 | 0 | 0 | 9 | 0 | 0 | 6 | 3 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 6 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 1 |
| ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
| 5817 | 36 | 1 | 1 | 2 | 8 | 0 | 6 | 1 | 2 | 1 | 2 | 6 | 5 | 3 | 2 | 2 | 5 | 2 | 2 | 0 | 0 | 4 | 1 | 3 | 2 | 3 | 3 | 3 | 0 | 9 | 0 | 5 | 1 | 3 | 5 | 4 | 4 | 3 | 3 | 0 | 0 | 3 | 3 | 2 | 0 | 0 | 6 | 0 | 4 | 0 | 0 | 0 | 0 | 0 | 3 | 0 | 0 | 0 | 3 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 1 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 2 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 1 |
| 5818 | 35 | 1 | 4 | 4 | 8 | 1 | 4 | 1 | 4 | 6 | 0 | 3 | 2 | 2 | 5 | 0 | 0 | 9 | 2 | 1 | 1 | 3 | 3 | 2 | 0 | 4 | 5 | 0 | 0 | 3 | 6 | 6 | 1 | 2 | 6 | 3 | 0 | 9 | 0 | 0 | 0 | 4 | 5 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 3 | 0 | 0 | 0 | 0 | 5 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 1 |
| 5819 | 33 | 1 | 3 | 4 | 8 | 0 | 6 | 0 | 3 | 5 | 1 | 4 | 3 | 3 | 4 | 0 | 1 | 8 | 1 | 0 | 0 | 2 | 3 | 5 | 1 | 1 | 1 | 4 | 4 | 7 | 2 | 4 | 0 | 5 | 8 | 1 | 5 | 3 | 1 | 1 | 0 | 3 | 3 | 2 | 0 | 0 | 6 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 1 |
| 5820 | 34 | 1 | 3 | 2 | 8 | 0 | 7 | 0 | 2 | 7 | 2 | 0 | 0 | 4 | 5 | 0 | 2 | 7 | 0 | 2 | 0 | 2 | 4 | 2 | 0 | 0 | 4 | 5 | 0 | 2 | 7 | 5 | 4 | 0 | 9 | 0 | 0 | 5 | 4 | 0 | 0 | 4 | 6 | 0 | 0 | 0 | 6 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 |
| 5821 | 33 | 1 | 3 | 3 | 8 | 0 | 6 | 1 | 2 | 7 | 1 | 2 | 1 | 4 | 4 | 1 | 2 | 6 | 1 | 0 | 1 | 3 | 2 | 4 | 1 | 1 | 2 | 6 | 1 | 5 | 4 | 5 | 2 | 3 | 6 | 3 | 2 | 5 | 2 | 1 | 0 | 3 | 3 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 |
5822 rows × 87 columns
print(len(df_ticdata.columns),
len(df_ticeval.columns),
len(df_tictgts.columns))
87 85 2
#add the column CARAVAN in the other dataset ticeval
df_ticeval = pd.concat([df_ticeval, df_tictgts], axis = 1, join ="inner")
#Reset our index
df_ticdata = df_ticdata.reset_index(col_fill='').drop(['index'], axis=1)
df_ticdata
| MOSTYPE | MAANTHUI | MGEMOMV | MGEMLEEF | MOSHOOFD | MGODRK | MGODPR | MGODOV | MGODGE | MRELGE | MRELSA | MRELOV | MFALLEEN | MFGEKIND | MFWEKIND | MOPLHOOG | MOPLMIDD | MOPLLAAG | MBERHOOG | MBERZELF | MBERBOER | MBERMIDD | MBERARBG | MBERARBO | MSKA | MSKB1 | MSKB2 | MSKC | MSKD | MHHUUR | MHKOOP | MAUT1 | MAUT2 | MAUT0 | MZFONDS | MZPART | MINKM30 | MINK3045 | MINK4575 | MINK7512 | MINK123M | MINKGEM | MKOOPKLA | PWAPART | PWABEDR | PWALAND | PPERSAUT | PBESAUT | PMOTSCO | PVRAAUT | PAANHANG | PTRACTOR | PWERKT | PBROM | PLEVEN | PPERSONG | PGEZONG | PWAOREG | PBRAND | PZEILPL | PPLEZIER | PFIETS | PINBOED | PBYSTAND | AWAPART | AWABEDR | AWALAND | APERSAUT | ABESAUT | AMOTSCO | AVRAAUT | AAANHANG | ATRACTOR | AWERKT | ABROM | ALEVEN | APERSONG | AGEZONG | AWAOREG | ABRAND | AZEILPL | APLEZIER | AFIETS | AINBOED | ABYSTAND | CARAVAN | TRAIN | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 33 | 1 | 3 | 2 | 8 | 0 | 5 | 1 | 3 | 7 | 0 | 2 | 1 | 2 | 6 | 1 | 2 | 7 | 1 | 0 | 1 | 2 | 5 | 2 | 1 | 1 | 2 | 6 | 1 | 1 | 8 | 8 | 0 | 1 | 8 | 1 | 0 | 4 | 5 | 0 | 0 | 4 | 3 | 0 | 0 | 0 | 6 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 5 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 1 |
| 1 | 37 | 1 | 2 | 2 | 8 | 1 | 4 | 1 | 4 | 6 | 2 | 2 | 0 | 4 | 5 | 0 | 5 | 4 | 0 | 0 | 0 | 5 | 0 | 4 | 0 | 2 | 3 | 5 | 0 | 2 | 7 | 7 | 1 | 2 | 6 | 3 | 2 | 0 | 5 | 2 | 0 | 5 | 4 | 2 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2 | 0 | 0 | 0 | 0 | 0 | 2 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 1 |
| 2 | 37 | 1 | 2 | 2 | 8 | 0 | 4 | 2 | 4 | 3 | 2 | 4 | 4 | 4 | 2 | 0 | 5 | 4 | 0 | 0 | 0 | 7 | 0 | 2 | 0 | 5 | 0 | 4 | 0 | 7 | 2 | 7 | 0 | 2 | 9 | 0 | 4 | 5 | 0 | 0 | 0 | 3 | 4 | 2 | 0 | 0 | 6 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 1 |
| 3 | 9 | 1 | 3 | 3 | 3 | 2 | 3 | 2 | 4 | 5 | 2 | 2 | 2 | 3 | 4 | 3 | 4 | 2 | 4 | 0 | 0 | 3 | 1 | 2 | 3 | 2 | 1 | 4 | 0 | 5 | 4 | 9 | 0 | 0 | 7 | 2 | 1 | 5 | 3 | 0 | 0 | 4 | 4 | 0 | 0 | 0 | 6 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 1 |
| 4 | 40 | 1 | 4 | 2 | 10 | 1 | 4 | 1 | 4 | 7 | 1 | 2 | 2 | 4 | 4 | 5 | 4 | 0 | 0 | 5 | 4 | 0 | 0 | 0 | 9 | 0 | 0 | 0 | 0 | 4 | 5 | 6 | 2 | 1 | 5 | 4 | 0 | 0 | 9 | 0 | 0 | 6 | 3 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 6 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 1 |
| ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
| 5817 | 36 | 1 | 1 | 2 | 8 | 0 | 6 | 1 | 2 | 1 | 2 | 6 | 5 | 3 | 2 | 2 | 5 | 2 | 2 | 0 | 0 | 4 | 1 | 3 | 2 | 3 | 3 | 3 | 0 | 9 | 0 | 5 | 1 | 3 | 5 | 4 | 4 | 3 | 3 | 0 | 0 | 3 | 3 | 2 | 0 | 0 | 6 | 0 | 4 | 0 | 0 | 0 | 0 | 0 | 3 | 0 | 0 | 0 | 3 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 1 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 2 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 1 |
| 5818 | 35 | 1 | 4 | 4 | 8 | 1 | 4 | 1 | 4 | 6 | 0 | 3 | 2 | 2 | 5 | 0 | 0 | 9 | 2 | 1 | 1 | 3 | 3 | 2 | 0 | 4 | 5 | 0 | 0 | 3 | 6 | 6 | 1 | 2 | 6 | 3 | 0 | 9 | 0 | 0 | 0 | 4 | 5 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 3 | 0 | 0 | 0 | 0 | 5 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 1 |
| 5819 | 33 | 1 | 3 | 4 | 8 | 0 | 6 | 0 | 3 | 5 | 1 | 4 | 3 | 3 | 4 | 0 | 1 | 8 | 1 | 0 | 0 | 2 | 3 | 5 | 1 | 1 | 1 | 4 | 4 | 7 | 2 | 4 | 0 | 5 | 8 | 1 | 5 | 3 | 1 | 1 | 0 | 3 | 3 | 2 | 0 | 0 | 6 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 1 |
| 5820 | 34 | 1 | 3 | 2 | 8 | 0 | 7 | 0 | 2 | 7 | 2 | 0 | 0 | 4 | 5 | 0 | 2 | 7 | 0 | 2 | 0 | 2 | 4 | 2 | 0 | 0 | 4 | 5 | 0 | 2 | 7 | 5 | 4 | 0 | 9 | 0 | 0 | 5 | 4 | 0 | 0 | 4 | 6 | 0 | 0 | 0 | 6 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 |
| 5821 | 33 | 1 | 3 | 3 | 8 | 0 | 6 | 1 | 2 | 7 | 1 | 2 | 1 | 4 | 4 | 1 | 2 | 6 | 1 | 0 | 1 | 3 | 2 | 4 | 1 | 1 | 2 | 6 | 1 | 5 | 4 | 5 | 2 | 3 | 6 | 3 | 2 | 5 | 2 | 1 | 0 | 3 | 3 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 |
5822 rows × 87 columns
#Observ the duplicates of the dataset ticdata
print(df_ticdata.shape, df_ticdata.drop_duplicates().shape)
(5822, 87) (5220, 87)
#Missing values
pd.isnull(df_ticdata[[x for x in df_ticdata.columns[pd.isna(df_ticdata).any()].tolist()]]).sum()
Series([], dtype: float64)
#Look the type of all variables
df_ticdata.dtypes.to_dict()
{'MOSTYPE': dtype('int64'),
'MAANTHUI': dtype('int64'),
'MGEMOMV': dtype('int64'),
'MGEMLEEF': dtype('int64'),
'MOSHOOFD': dtype('int64'),
'MGODRK': dtype('int64'),
'MGODPR': dtype('int64'),
'MGODOV': dtype('int64'),
'MGODGE': dtype('int64'),
'MRELGE': dtype('int64'),
'MRELSA': dtype('int64'),
'MRELOV': dtype('int64'),
'MFALLEEN': dtype('int64'),
'MFGEKIND': dtype('int64'),
'MFWEKIND': dtype('int64'),
'MOPLHOOG': dtype('int64'),
'MOPLMIDD': dtype('int64'),
'MOPLLAAG': dtype('int64'),
'MBERHOOG': dtype('int64'),
'MBERZELF': dtype('int64'),
'MBERBOER': dtype('int64'),
'MBERMIDD': dtype('int64'),
'MBERARBG': dtype('int64'),
'MBERARBO': dtype('int64'),
'MSKA': dtype('int64'),
'MSKB1': dtype('int64'),
'MSKB2': dtype('int64'),
'MSKC': dtype('int64'),
'MSKD': dtype('int64'),
'MHHUUR': dtype('int64'),
'MHKOOP': dtype('int64'),
'MAUT1': dtype('int64'),
'MAUT2': dtype('int64'),
'MAUT0': dtype('int64'),
'MZFONDS': dtype('int64'),
'MZPART': dtype('int64'),
'MINKM30': dtype('int64'),
'MINK3045': dtype('int64'),
'MINK4575': dtype('int64'),
'MINK7512': dtype('int64'),
'MINK123M': dtype('int64'),
'MINKGEM': dtype('int64'),
'MKOOPKLA': dtype('int64'),
'PWAPART': dtype('int64'),
'PWABEDR': dtype('int64'),
'PWALAND': dtype('int64'),
'PPERSAUT': dtype('int64'),
'PBESAUT': dtype('int64'),
'PMOTSCO': dtype('int64'),
'PVRAAUT': dtype('int64'),
'PAANHANG': dtype('int64'),
'PTRACTOR': dtype('int64'),
'PWERKT': dtype('int64'),
'PBROM': dtype('int64'),
'PLEVEN': dtype('int64'),
'PPERSONG': dtype('int64'),
'PGEZONG': dtype('int64'),
'PWAOREG': dtype('int64'),
'PBRAND': dtype('int64'),
'PZEILPL': dtype('int64'),
'PPLEZIER': dtype('int64'),
'PFIETS': dtype('int64'),
'PINBOED': dtype('int64'),
'PBYSTAND': dtype('int64'),
'AWAPART': dtype('int64'),
'AWABEDR': dtype('int64'),
'AWALAND': dtype('int64'),
'APERSAUT': dtype('int64'),
'ABESAUT': dtype('int64'),
'AMOTSCO': dtype('int64'),
'AVRAAUT': dtype('int64'),
'AAANHANG': dtype('int64'),
'ATRACTOR': dtype('int64'),
'AWERKT': dtype('int64'),
'ABROM': dtype('int64'),
'ALEVEN': dtype('int64'),
'APERSONG': dtype('int64'),
'AGEZONG': dtype('int64'),
'AWAOREG': dtype('int64'),
'ABRAND': dtype('int64'),
'AZEILPL': dtype('int64'),
'APLEZIER': dtype('int64'),
'AFIETS': dtype('int64'),
'AINBOED': dtype('int64'),
'ABYSTAND': dtype('int64'),
'CARAVAN': dtype('int64'),
'TRAIN': dtype('int64')}
#We took the target variable, 'CARAVAN', potencial clients who search an insurance of caravans.
#We look how many people don't have insurance and how many do.
df_data_caravan = df_ticdata['CARAVAN']\
.value_counts(normalize=True)\
.mul(100).rename('percent').reset_index()
df_data_caravan_conteo = df_ticdata['CARAVAN'].value_counts().reset_index()
df_data_caravan_pc = pd.merge(df_data_caravan, df_data_caravan_conteo, on=['index'], how='inner')
df_data_caravan_pc
| index | percent | CARAVAN | |
|---|---|---|---|
| 0 | 0 | 94.022673 | 5474 |
| 1 | 1 | 5.977327 | 348 |
#We will create a histogram on the percent of people who have or no insurance of caravans.
fig = px.histogram(df_data_caravan_pc, x="index", y=['percent'])
fig.show()
We can look that the 94.03% don´t have an insurance of caravans, versus a 5,97% that have it.
#Removed the people who have an insurance and we will continue work with that they don´t have it.
df_ticdata_caravan = df_ticdata[(df_ticdata['CARAVAN']== 0)]
df_ticdata_caravan.shape
(5474, 87)
#Comprobate if in our dataframe exists nulls values
df_data_null_columns = df_ticdata.isnull().sum().sort_values(ascending=False)
df_data_null_rows = df_ticdata.isnull().sum(axis=1).sort_values(ascending=False)
print(df_data_null_columns.shape, df_data_null_rows.shape)
df_null_columnas = pd.DataFrame(df_data_null_columns, columns=['nulls_columns'])
df_null_filas = pd.DataFrame(df_data_null_rows, columns=['nulls_rows'])
df_null_columnas['percent_columns'] = df_null_columnas['nulls_columns']/df_ticdata.shape[0]
df_null_filas['percent_rows']= df_null_filas['nulls_rows']/df_ticdata.shape[1]
(87,) (5822,)
#No exists null values.
df_null_columnas
| nulls_columns | percent_columns | |
|---|---|---|
| MOSTYPE | 0 | 0.0 |
| PPERSONG | 0 | 0.0 |
| PBYSTAND | 0 | 0.0 |
| PINBOED | 0 | 0.0 |
| PFIETS | 0 | 0.0 |
| PPLEZIER | 0 | 0.0 |
| PZEILPL | 0 | 0.0 |
| PBRAND | 0 | 0.0 |
| PWAOREG | 0 | 0.0 |
| PGEZONG | 0 | 0.0 |
| PLEVEN | 0 | 0.0 |
| AWABEDR | 0 | 0.0 |
| PBROM | 0 | 0.0 |
| PWERKT | 0 | 0.0 |
| PTRACTOR | 0 | 0.0 |
| PAANHANG | 0 | 0.0 |
| PVRAAUT | 0 | 0.0 |
| PMOTSCO | 0 | 0.0 |
| PBESAUT | 0 | 0.0 |
| PPERSAUT | 0 | 0.0 |
| AWAPART | 0 | 0.0 |
| AWALAND | 0 | 0.0 |
| PWABEDR | 0 | 0.0 |
| AGEZONG | 0 | 0.0 |
| CARAVAN | 0 | 0.0 |
| ABYSTAND | 0 | 0.0 |
| AINBOED | 0 | 0.0 |
| AFIETS | 0 | 0.0 |
| APLEZIER | 0 | 0.0 |
| AZEILPL | 0 | 0.0 |
| ABRAND | 0 | 0.0 |
| AWAOREG | 0 | 0.0 |
| APERSONG | 0 | 0.0 |
| APERSAUT | 0 | 0.0 |
| ALEVEN | 0 | 0.0 |
| ABROM | 0 | 0.0 |
| AWERKT | 0 | 0.0 |
| ATRACTOR | 0 | 0.0 |
| AAANHANG | 0 | 0.0 |
| AVRAAUT | 0 | 0.0 |
| AMOTSCO | 0 | 0.0 |
| ABESAUT | 0 | 0.0 |
| PWALAND | 0 | 0.0 |
| PWAPART | 0 | 0.0 |
| MAANTHUI | 0 | 0.0 |
| MRELOV | 0 | 0.0 |
| MBERZELF | 0 | 0.0 |
| MBERHOOG | 0 | 0.0 |
| MOPLLAAG | 0 | 0.0 |
| MOPLMIDD | 0 | 0.0 |
| MOPLHOOG | 0 | 0.0 |
| MFWEKIND | 0 | 0.0 |
| MFGEKIND | 0 | 0.0 |
| MFALLEEN | 0 | 0.0 |
| MRELSA | 0 | 0.0 |
| MBERMIDD | 0 | 0.0 |
| MRELGE | 0 | 0.0 |
| MGODGE | 0 | 0.0 |
| MGODOV | 0 | 0.0 |
| MGODPR | 0 | 0.0 |
| MGODRK | 0 | 0.0 |
| MOSHOOFD | 0 | 0.0 |
| MGEMLEEF | 0 | 0.0 |
| MGEMOMV | 0 | 0.0 |
| MBERBOER | 0 | 0.0 |
| MBERARBG | 0 | 0.0 |
| MKOOPKLA | 0 | 0.0 |
| MAUT0 | 0 | 0.0 |
| MINKGEM | 0 | 0.0 |
| MINK123M | 0 | 0.0 |
| MINK7512 | 0 | 0.0 |
| MINK4575 | 0 | 0.0 |
| MINK3045 | 0 | 0.0 |
| MINKM30 | 0 | 0.0 |
| MZPART | 0 | 0.0 |
| MZFONDS | 0 | 0.0 |
| MAUT2 | 0 | 0.0 |
| MBERARBO | 0 | 0.0 |
| MAUT1 | 0 | 0.0 |
| MHKOOP | 0 | 0.0 |
| MHHUUR | 0 | 0.0 |
| MSKD | 0 | 0.0 |
| MSKC | 0 | 0.0 |
| MSKB2 | 0 | 0.0 |
| MSKB1 | 0 | 0.0 |
| MSKA | 0 | 0.0 |
| TRAIN | 0 | 0.0 |
#The column 'MGEMLEEF' is a categorical variable who represents the age, we convert it into a numerical variable
#set the mean value of the values.
df_ticdata.loc[df_ticdata['MGEMLEEF'] == 1,'MGEMLEEF']=25
df_ticdata.loc[df_ticdata['MGEMLEEF'] == 2,'MGEMLEEF']=35
df_ticdata.loc[df_ticdata['MGEMLEEF'] == 3,'MGEMLEEF']=45
df_ticdata.loc[df_ticdata['MGEMLEEF'] == 4,'MGEMLEEF']=55
df_ticdata.loc[df_ticdata['MGEMLEEF'] == 5,'MGEMLEEF']=65
df_ticdata.loc[df_ticdata['MGEMLEEF'] == 6,'MGEMLEEF']=75
[df_ticdata['MGEMLEEF']]
[0 35
1 35
2 35
3 45
4 35
..
5817 35
5818 55
5819 55
5820 35
5821 45
Name: MGEMLEEF, Length: 5822, dtype: int64]
Dataframe with only numerical variables
#We compare our target variable in the train and test.
#train
plot_df_ticdata = df_ticdata['CARAVAN']\
.value_counts(normalize=True)\
.mul(100).rename('percent').reset_index()
plot_df_ticdata_conteo = df_ticdata['CARAVAN'].value_counts().reset_index()
plot_df_ticdata_pc = pd.merge(plot_df_ticdata, plot_df_ticdata_conteo, on=['index'], how='inner')
#test
plot_df_ticeval = df_ticeval['CARAVAN']\
.value_counts(normalize=True)\
.mul(100).rename('percent').reset_index()
plot_df_ticeval_conteo = df_ticeval['CARAVAN'].value_counts().reset_index()
plot_df_ticeval_pc = pd.merge(plot_df_ticeval, plot_df_ticeval_conteo, on=['index'], how='inner')
print(plot_df_ticdata_pc)
print(plot_df_ticeval_pc)
index percent CARAVAN 0 0 94.022673 5474 1 1 5.977327 348 index percent CARAVAN 0 0 94.05 3762 1 1 5.95 238
#We will create two histograms of train and test.
fig_train = px.histogram(plot_df_ticdata_pc, x="index", y=['percent'])
fig_train.show()
fig_test = px.histogram(plot_df_ticeval_pc, x="index", y=['percent'])
fig_test.show()
#We create a data frame with the categorical variables.
df_ticdata_categorical_variables = df_ticdata [["MOSTYPE", "MOSHOOFD", "MGODRK",
"MGODPR", "MGODOV", "MGODGE",
"MRELGE", "MRELSA", "MRELOV", "MFALLEEN",
"MFGEKIND", "MFWEKIND", "MOPLHOOG", "MOPLMIDD",
"MOPLLAAG", "MBERHOOG", "MBERZELF",
"MBERBOER", "MBERMIDD", "MBERARBG", "MBERARBO",
"MSKA","MSKB1", "MSKB2", "MSKC",
"MSKD", "MHHUUR", "MHKOOP", "MAUT1", "MAUT2",
"MAUT0", "MZFONDS", "MZPART", "MINKM30",
"MINK3045", "MINK4575", "MINK7512", "MINK123M",
"MINKGEM", "MKOOPKLA", "PWAPART", "PWABEDR", "PWALAND",
"PPERSAUT", "PBESAUT", "PMOTSCO", "PVRAAUT",
"PAANHANG", "PTRACTOR", "PWERKT", "PBROM",
"PLEVEN", "PPERSONG", "PGEZONG", "PWAOREG",
"PBRAND", "PZEILPL", "PPLEZIER", "PFIETS","PINBOED", "PBYSTAND","CARAVAN"]]
df_ticdata_categorical_variables.head()
| MOSTYPE | MOSHOOFD | MGODRK | MGODPR | MGODOV | MGODGE | MRELGE | MRELSA | MRELOV | MFALLEEN | MFGEKIND | MFWEKIND | MOPLHOOG | MOPLMIDD | MOPLLAAG | MBERHOOG | MBERZELF | MBERBOER | MBERMIDD | MBERARBG | MBERARBO | MSKA | MSKB1 | MSKB2 | MSKC | MSKD | MHHUUR | MHKOOP | MAUT1 | MAUT2 | MAUT0 | MZFONDS | MZPART | MINKM30 | MINK3045 | MINK4575 | MINK7512 | MINK123M | MINKGEM | MKOOPKLA | PWAPART | PWABEDR | PWALAND | PPERSAUT | PBESAUT | PMOTSCO | PVRAAUT | PAANHANG | PTRACTOR | PWERKT | PBROM | PLEVEN | PPERSONG | PGEZONG | PWAOREG | PBRAND | PZEILPL | PPLEZIER | PFIETS | PINBOED | PBYSTAND | CARAVAN | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 33 | 8 | 0 | 5 | 1 | 3 | 7 | 0 | 2 | 1 | 2 | 6 | 1 | 2 | 7 | 1 | 0 | 1 | 2 | 5 | 2 | 1 | 1 | 2 | 6 | 1 | 1 | 8 | 8 | 0 | 1 | 8 | 1 | 0 | 4 | 5 | 0 | 0 | 4 | 3 | 0 | 0 | 0 | 6 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 5 | 0 | 0 | 0 | 0 | 0 | 0 |
| 1 | 37 | 8 | 1 | 4 | 1 | 4 | 6 | 2 | 2 | 0 | 4 | 5 | 0 | 5 | 4 | 0 | 0 | 0 | 5 | 0 | 4 | 0 | 2 | 3 | 5 | 0 | 2 | 7 | 7 | 1 | 2 | 6 | 3 | 2 | 0 | 5 | 2 | 0 | 5 | 4 | 2 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2 | 0 | 0 | 0 | 0 | 0 | 0 |
| 2 | 37 | 8 | 0 | 4 | 2 | 4 | 3 | 2 | 4 | 4 | 4 | 2 | 0 | 5 | 4 | 0 | 0 | 0 | 7 | 0 | 2 | 0 | 5 | 0 | 4 | 0 | 7 | 2 | 7 | 0 | 2 | 9 | 0 | 4 | 5 | 0 | 0 | 0 | 3 | 4 | 2 | 0 | 0 | 6 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2 | 0 | 0 | 0 | 0 | 0 | 0 |
| 3 | 9 | 3 | 2 | 3 | 2 | 4 | 5 | 2 | 2 | 2 | 3 | 4 | 3 | 4 | 2 | 4 | 0 | 0 | 3 | 1 | 2 | 3 | 2 | 1 | 4 | 0 | 5 | 4 | 9 | 0 | 0 | 7 | 2 | 1 | 5 | 3 | 0 | 0 | 4 | 4 | 0 | 0 | 0 | 6 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2 | 0 | 0 | 0 | 0 | 0 | 0 |
| 4 | 40 | 10 | 1 | 4 | 1 | 4 | 7 | 1 | 2 | 2 | 4 | 4 | 5 | 4 | 0 | 0 | 5 | 4 | 0 | 0 | 0 | 9 | 0 | 0 | 0 | 0 | 4 | 5 | 6 | 2 | 1 | 5 | 4 | 0 | 0 | 9 | 0 | 0 | 6 | 3 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 6 | 0 | 0 | 0 | 0 | 0 | 0 |
#We create a data frame with the numerical variables.
df_ticdata_numerical_variables = df_ticdata[["MAANTHUI", "MGEMOMV", "MGEMLEEF","AWAPART","AWABEDR", "AWALAND","APERSAUT","ABESAUT","AMOTSCO",
"AVRAAUT","AAANHANG","ATRACTOR","AWERKT","ABROM","ALEVEN","APERSONG","AGEZONG",
"AWAOREG","ABRAND","AZEILPL","APLEZIER", "AFIETS", "AINBOED", "ABYSTAND", "CARAVAN"]]
df_ticdata_numerical_variables.head()
| MAANTHUI | MGEMOMV | MGEMLEEF | AWAPART | AWABEDR | AWALAND | APERSAUT | ABESAUT | AMOTSCO | AVRAAUT | AAANHANG | ATRACTOR | AWERKT | ABROM | ALEVEN | APERSONG | AGEZONG | AWAOREG | ABRAND | AZEILPL | APLEZIER | AFIETS | AINBOED | ABYSTAND | CARAVAN | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 1 | 3 | 35 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 |
| 1 | 1 | 2 | 35 | 2 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 |
| 2 | 1 | 2 | 35 | 1 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 |
| 3 | 1 | 3 | 45 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 |
| 4 | 1 | 4 | 35 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 |
def get_corr_matrix(dataset = None , metodo='spearman', size_figure=[10,8]):
#To obtain the Spearman´s correlation, only change the method
if dataset is None:
print(u'\nNeed to add arguments to the function')
return 1
sns.set(style="white")
# Compute the correlation matrix
corr = dataset.corr(method=metodo)
# Set self-correlation to zero to avoid distraction
for i in range(corr.shape[0]):
corr.iloc[i, i] = 0
# Set up the matplotlib figure
f, ax = plt.subplots(figsize=size_figure)
# Draw the heatmap with the mask and correct aspect ratio
sns.heatmap(corr, center=0,
square=True, linewidths=.5, cmap ='viridis' ) #cbar_kws={"shrink": .5}
plt.show()
#In the upper left side we have the correlation of the categorical variables and in the lower right side we have
#the correlation of the policies.
get_corr_matrix(dataset = df_ticdata, size_figure=[10,8])
#We do the correlation matrix of the categorical variables.
get_corr_matrix(dataset = df_ticdata_categorical_variables, size_figure=[10,8])
#We do the correlation matrix of the numerical variables.
get_corr_matrix(dataset = df_ticdata_numerical_variables,
metodo='pearson', size_figure=[10,8])
#We create a data frame with the policies.
df_ticdata_policies = df_ticdata[["PWAPART", "PWABEDR", "PWALAND",
"PPERSAUT", "PBESAUT", "PMOTSCO", "PVRAAUT",
"PAANHANG", "PTRACTOR", "PWERKT", "PBROM",
"PLEVEN", "PPERSONG", "PGEZONG", "PWAOREG",
"PBRAND", "PZEILPL", "PPLEZIER", "PFIETS","PINBOED", "PBYSTAND","AWAPART","AWABEDR",
"AWALAND","APERSAUT","ABESAUT","AMOTSCO","AVRAAUT","AAANHANG","ATRACTOR",
"AWERKT","ABROM","ALEVEN","APERSONG","AGEZONG","AWAOREG","ABRAND","AZEILPL","APLEZIER",
"AFIETS", "AINBOED", "ABYSTAND", "CARAVAN", "TRAIN"]]
get_corr_matrix(dataset = df_ticdata_policies, size_figure=[10,8])
df_ticdata_policies.corr(method='spearman').style.background_gradient(cmap='coolwarm')
| PWAPART | PWABEDR | PWALAND | PPERSAUT | PBESAUT | PMOTSCO | PVRAAUT | PAANHANG | PTRACTOR | PWERKT | PBROM | PLEVEN | PPERSONG | PGEZONG | PWAOREG | PBRAND | PZEILPL | PPLEZIER | PFIETS | PINBOED | PBYSTAND | AWAPART | AWABEDR | AWALAND | APERSAUT | ABESAUT | AMOTSCO | AVRAAUT | AAANHANG | ATRACTOR | AWERKT | ABROM | ALEVEN | APERSONG | AGEZONG | AWAOREG | ABRAND | AZEILPL | APLEZIER | AFIETS | AINBOED | ABYSTAND | CARAVAN | TRAIN | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| PWAPART | 1.000000 | -0.047163 | -0.111101 | 0.158245 | -0.040925 | 0.023527 | -0.022796 | -0.019500 | -0.075141 | -0.024934 | -0.153253 | 0.139312 | -0.010215 | 0.056989 | -0.001413 | 0.512962 | 0.013126 | -0.003985 | -0.011382 | 0.042868 | 0.047596 | 0.989286 | -0.047255 | -0.111043 | 0.152571 | -0.040878 | 0.023880 | -0.022800 | -0.019434 | -0.075159 | -0.024928 | -0.152564 | 0.140592 | -0.010157 | 0.056973 | -0.001384 | 0.558860 | 0.013123 | -0.004017 | -0.011138 | 0.042850 | 0.047635 | 0.095332 | nan |
| PWABEDR | -0.047163 | 1.000000 | 0.033714 | -0.011779 | 0.216533 | -0.015888 | 0.144653 | 0.085160 | 0.075760 | 0.115155 | -0.032271 | 0.019661 | -0.008745 | -0.009688 | 0.224897 | 0.081550 | -0.002714 | -0.009024 | -0.019236 | 0.022890 | -0.001808 | -0.049170 | 0.999970 | 0.033807 | -0.019640 | 0.216492 | -0.016128 | 0.144678 | 0.085141 | 0.075443 | 0.115076 | -0.032286 | 0.019100 | -0.008745 | -0.009688 | 0.224926 | 0.046095 | -0.002714 | -0.009024 | -0.019235 | 0.022781 | -0.001887 | 0.000573 | nan |
| PWALAND | -0.111101 | 0.033714 | 1.000000 | 0.079374 | 0.026701 | -0.004038 | -0.005708 | 0.099282 | 0.557020 | 0.152154 | -0.010308 | 0.000528 | 0.039775 | 0.018207 | 0.049273 | 0.206860 | -0.003294 | 0.004915 | -0.015717 | 0.001078 | 0.003103 | -0.111562 | 0.033855 | 0.999944 | 0.084595 | 0.026700 | -0.003740 | -0.005708 | 0.099085 | 0.557292 | 0.152128 | -0.010112 | 0.000111 | 0.039732 | 0.018274 | 0.049310 | 0.130971 | -0.003294 | 0.004987 | -0.015763 | 0.001140 | 0.002974 | -0.021325 | nan |
| PPERSAUT | 0.158245 | -0.011779 | 0.079374 | 1.000000 | 0.019013 | 0.057908 | 0.010823 | 0.045688 | 0.080298 | 0.031518 | -0.176222 | 0.070992 | 0.010389 | 0.046971 | -0.001973 | 0.113067 | -0.006312 | 0.036191 | -0.036268 | 0.018230 | 0.091473 | 0.149409 | -0.011997 | 0.079418 | 0.949831 | 0.019014 | 0.057821 | 0.010817 | 0.045691 | 0.080241 | 0.031550 | -0.175535 | 0.071357 | 0.010432 | 0.046916 | -0.001959 | 0.047518 | -0.006317 | 0.036233 | -0.036053 | 0.018161 | 0.091598 | 0.163670 | nan |
| PBESAUT | -0.040925 | 0.216533 | 0.026701 | 0.019013 | 1.000000 | 0.031521 | 0.238382 | 0.099526 | 0.047395 | 0.153853 | -0.024618 | 0.022324 | -0.006671 | -0.007390 | 0.054893 | 0.021854 | -0.002070 | -0.006884 | -0.002552 | -0.008047 | 0.021293 | -0.042272 | 0.214862 | 0.026826 | 0.018165 | 0.999994 | 0.031403 | 0.238391 | 0.099385 | 0.047097 | 0.153848 | -0.024629 | 0.022626 | -0.006671 | -0.007390 | 0.054851 | -0.005249 | -0.002070 | -0.006884 | -0.002627 | -0.008047 | 0.021374 | -0.006945 | nan |
| PMOTSCO | 0.023527 | -0.015888 | -0.004038 | 0.057908 | 0.031521 | 1.000000 | -0.007833 | -0.003964 | -0.003073 | -0.011977 | -0.043150 | 0.036764 | 0.022904 | -0.005177 | 0.002173 | 0.010787 | -0.004520 | -0.002718 | -0.014718 | 0.013142 | 0.022248 | 0.019111 | -0.015964 | -0.003981 | 0.060993 | 0.031393 | 0.999814 | -0.007833 | -0.003987 | -0.003076 | -0.011977 | -0.043110 | 0.036714 | 0.022946 | -0.005153 | 0.002163 | -0.012002 | -0.004520 | -0.002743 | -0.014679 | 0.013159 | 0.022319 | 0.009914 | nan |
| PVRAAUT | -0.022796 | 0.144653 | -0.005708 | 0.010823 | 0.238382 | -0.007833 | 1.000000 | 0.079573 | 0.051184 | 0.070785 | -0.010624 | -0.009054 | -0.002879 | -0.003189 | 0.067218 | 0.016459 | -0.000893 | -0.002971 | 0.021535 | -0.003473 | -0.004703 | -0.023340 | 0.143625 | -0.005708 | 0.010545 | 0.238184 | -0.007834 | 1.000000 | 0.079446 | 0.050680 | 0.070672 | -0.010629 | -0.009055 | -0.002879 | -0.003189 | 0.067170 | 0.000005 | -0.000893 | -0.002971 | 0.021362 | -0.003473 | -0.004703 | -0.009921 | nan |
| PAANHANG | -0.019500 | 0.085160 | 0.099282 | 0.045688 | 0.099526 | -0.003964 | 0.079573 | 1.000000 | 0.078444 | 0.103235 | -0.008929 | 0.004821 | 0.014703 | 0.011482 | 0.019654 | 0.048699 | 0.069743 | 0.035834 | -0.006662 | 0.046318 | -0.012700 | -0.020626 | 0.084773 | 0.099720 | 0.036971 | 0.099597 | -0.004010 | 0.079530 | 0.999989 | 0.078633 | 0.103349 | -0.009300 | 0.005718 | 0.014730 | 0.011527 | 0.019636 | 0.030697 | 0.069755 | 0.035594 | -0.006726 | 0.046351 | -0.012700 | 0.014593 | nan |
| PTRACTOR | -0.075141 | 0.075760 | 0.557020 | 0.080298 | 0.047395 | -0.003073 | 0.051184 | 0.078444 | 1.000000 | 0.215082 | -0.006995 | -0.005496 | 0.049669 | 0.015186 | 0.078890 | 0.168906 | -0.003603 | 0.002618 | -0.011351 | -0.001531 | 0.019099 | -0.075954 | 0.076020 | 0.556516 | 0.084904 | 0.047440 | -0.002730 | 0.051194 | 0.078122 | 0.999956 | 0.215087 | -0.007289 | -0.006281 | 0.049647 | 0.015157 | 0.078898 | 0.100894 | -0.003603 | 0.002633 | -0.011438 | -0.001475 | 0.018860 | -0.016444 | nan |
| PWERKT | -0.024934 | 0.115155 | 0.152154 | 0.031518 | 0.153853 | -0.011977 | 0.070785 | 0.103235 | 0.215082 | 1.000000 | 0.006295 | -0.000076 | 0.074417 | -0.004877 | 0.087669 | 0.057481 | -0.001366 | -0.004543 | -0.009684 | -0.005310 | -0.007191 | -0.025988 | 0.114593 | 0.152525 | 0.032054 | 0.153608 | -0.011979 | 0.070747 | 0.103096 | 0.214361 | 0.999999 | 0.006422 | -0.000999 | 0.074268 | -0.004877 | 0.087606 | 0.030828 | -0.001366 | -0.004543 | -0.009683 | -0.005310 | -0.007191 | -0.015170 | nan |
| PBROM | -0.153253 | -0.032271 | -0.010308 | -0.176222 | -0.024618 | -0.043150 | -0.010624 | -0.008929 | -0.006995 | 0.006295 | 1.000000 | -0.046376 | -0.019755 | -0.005067 | -0.017005 | -0.176993 | -0.006131 | -0.020386 | -0.025951 | -0.023830 | -0.015362 | -0.153855 | -0.032272 | -0.010319 | -0.183393 | -0.024618 | -0.043182 | -0.010624 | -0.008924 | -0.006887 | 0.006287 | 0.999482 | -0.046699 | -0.019755 | -0.005085 | -0.017005 | -0.199225 | -0.006131 | -0.020386 | -0.026058 | -0.023831 | -0.015368 | -0.045199 | nan |
| PLEVEN | 0.139312 | 0.019661 | 0.000528 | 0.070992 | 0.022324 | 0.036764 | -0.009054 | 0.004821 | -0.005496 | -0.000076 | -0.046376 | 1.000000 | 0.037271 | 0.128552 | -0.001879 | 0.130436 | -0.005225 | 0.004139 | -0.001883 | 0.025079 | 0.027143 | 0.135659 | 0.019363 | 0.000296 | 0.068048 | 0.022213 | 0.037025 | -0.009054 | 0.004952 | -0.005406 | -0.000071 | -0.046309 | 0.999237 | 0.037279 | 0.128624 | -0.001888 | 0.105966 | -0.005225 | 0.004059 | -0.001979 | 0.025088 | 0.027114 | 0.018654 | nan |
| PPERSONG | -0.010215 | -0.008745 | 0.039775 | 0.010389 | -0.006671 | 0.022904 | -0.002879 | 0.014703 | 0.049669 | 0.074417 | -0.019755 | 0.037271 | 1.000000 | -0.005930 | -0.004608 | 0.013169 | -0.001661 | 0.025725 | -0.011775 | -0.006457 | -0.008745 | -0.011943 | -0.008745 | 0.039263 | 0.006398 | -0.006671 | 0.022291 | -0.002879 | 0.014638 | 0.049704 | 0.074539 | -0.019764 | 0.036003 | 0.999996 | -0.005930 | -0.004608 | 0.010739 | -0.001661 | 0.025865 | -0.011775 | -0.006457 | -0.008745 | -0.008504 | nan |
| PGEZONG | 0.056989 | -0.009688 | 0.018207 | 0.046971 | -0.007390 | -0.005177 | -0.003189 | 0.011482 | 0.015186 | -0.004877 | -0.005067 | 0.128552 | -0.005930 | 1.000000 | -0.005105 | 0.059531 | -0.001840 | -0.006120 | 0.014087 | 0.017177 | 0.117668 | 0.055236 | -0.009688 | 0.018200 | 0.047005 | -0.007390 | -0.005046 | -0.003189 | 0.011622 | 0.015181 | -0.004877 | -0.004976 | 0.127982 | -0.005930 | 0.999995 | -0.005105 | 0.043113 | -0.001840 | -0.006120 | 0.014678 | 0.017144 | 0.117187 | 0.033663 | nan |
| PWAOREG | -0.001413 | 0.224897 | 0.049273 | -0.001973 | 0.054893 | 0.002173 | 0.067218 | 0.019654 | 0.078890 | 0.087669 | -0.017005 | -0.001879 | -0.004608 | -0.005105 | 1.000000 | 0.053754 | -0.001430 | -0.004755 | -0.010136 | -0.005558 | -0.007527 | -0.007033 | 0.224792 | 0.048680 | -0.003710 | 0.054737 | 0.001731 | 0.067182 | 0.019650 | 0.078786 | 0.087559 | -0.017012 | -0.002230 | -0.004608 | -0.005105 | 0.999998 | 0.030579 | -0.001430 | -0.004755 | -0.010135 | -0.005558 | -0.007527 | 0.030322 | nan |
| PBRAND | 0.512962 | 0.081550 | 0.206860 | 0.113067 | 0.021854 | 0.010787 | 0.016459 | 0.048699 | 0.168906 | 0.057481 | -0.176993 | 0.130436 | 0.013169 | 0.059531 | 0.053754 | 1.000000 | 0.011413 | 0.014918 | -0.037512 | 0.036000 | 0.056083 | 0.512606 | 0.081178 | 0.206762 | 0.093533 | 0.021870 | 0.011351 | 0.016463 | 0.048626 | 0.168998 | 0.057455 | -0.176473 | 0.131198 | 0.013179 | 0.059521 | 0.053801 | 0.915881 | 0.011410 | 0.014869 | -0.037218 | 0.035995 | 0.055778 | 0.100531 | nan |
| PZEILPL | 0.013126 | -0.002714 | -0.003294 | -0.006312 | -0.002070 | -0.004520 | -0.000893 | 0.069743 | -0.003603 | -0.001366 | -0.006131 | -0.005225 | -0.001661 | -0.001840 | -0.001430 | 0.011413 | 1.000000 | 0.099603 | -0.003654 | 0.084003 | -0.002714 | 0.012227 | -0.002714 | -0.003294 | -0.008585 | -0.002070 | -0.004521 | -0.000893 | 0.069534 | -0.003603 | -0.001366 | -0.006133 | -0.005225 | -0.001661 | -0.001840 | -0.001430 | 0.004889 | 1.000000 | 0.099049 | -0.003654 | 0.084389 | -0.002714 | 0.026196 | nan |
| PPLEZIER | -0.003985 | -0.009024 | 0.004915 | 0.036191 | -0.006884 | -0.002718 | -0.002971 | 0.035834 | 0.002618 | -0.004543 | -0.020386 | 0.004139 | 0.025725 | -0.006120 | -0.004755 | 0.014918 | 0.099603 | 1.000000 | -0.012151 | 0.045675 | 0.010324 | -0.005913 | -0.009024 | 0.005073 | 0.034027 | -0.006884 | -0.003088 | -0.002971 | 0.035706 | 0.002684 | -0.004543 | -0.020395 | 0.003188 | 0.025763 | -0.006120 | -0.004755 | 0.003014 | 0.099620 | 0.999995 | -0.012151 | 0.045757 | 0.010421 | 0.106366 | nan |
| PFIETS | -0.011382 | -0.019236 | -0.015717 | -0.036268 | -0.002552 | -0.014718 | 0.021535 | -0.006662 | -0.011351 | -0.009684 | -0.025951 | -0.001883 | -0.011775 | 0.014087 | -0.010136 | -0.037512 | -0.003654 | -0.012151 | 1.000000 | 0.010756 | 0.008598 | -0.013701 | -0.019237 | -0.015642 | -0.041116 | -0.002583 | -0.014926 | 0.021559 | -0.006692 | -0.011321 | -0.009684 | -0.026125 | -0.001787 | -0.011776 | 0.014148 | -0.010136 | -0.041336 | -0.003654 | -0.012152 | 0.999939 | 0.010795 | 0.008632 | 0.028695 | nan |
| PINBOED | 0.042868 | 0.022890 | 0.001078 | 0.018230 | -0.008047 | 0.013142 | -0.003473 | 0.046318 | -0.001531 | -0.005310 | -0.023830 | 0.025079 | -0.006457 | 0.017177 | -0.005558 | 0.036000 | 0.084003 | 0.045675 | 0.010756 | 1.000000 | 0.023047 | 0.039557 | 0.022836 | 0.000937 | 0.016920 | -0.008047 | 0.013069 | -0.003473 | 0.046521 | -0.001475 | -0.005310 | -0.023841 | 0.025053 | -0.006457 | 0.017232 | -0.005558 | 0.028702 | 0.084018 | 0.045478 | 0.010601 | 0.999992 | 0.022836 | 0.019017 | nan |
| PBYSTAND | 0.047596 | -0.001808 | 0.003103 | 0.091473 | 0.021293 | 0.022248 | -0.004703 | -0.012700 | 0.019099 | -0.007191 | -0.015362 | 0.027143 | -0.008745 | 0.117668 | -0.007527 | 0.056083 | -0.002714 | 0.010324 | 0.008598 | 0.023047 | 1.000000 | 0.044489 | -0.001842 | 0.003306 | 0.097158 | 0.021211 | 0.021993 | -0.004703 | -0.012700 | 0.019121 | -0.007191 | -0.014957 | 0.025198 | -0.008745 | 0.117479 | -0.007527 | 0.037231 | -0.002714 | 0.010394 | 0.008656 | 0.022938 | 0.999970 | 0.068132 | nan |
| AWAPART | 0.989286 | -0.049170 | -0.111562 | 0.149409 | -0.042272 | 0.019111 | -0.023340 | -0.020626 | -0.075954 | -0.025988 | -0.153855 | 0.135659 | -0.011943 | 0.055236 | -0.007033 | 0.512606 | 0.012227 | -0.005913 | -0.013701 | 0.039557 | 0.044489 | 1.000000 | -0.049260 | -0.111496 | 0.142575 | -0.042225 | 0.019443 | -0.023345 | -0.020552 | -0.075988 | -0.025982 | -0.153167 | 0.137124 | -0.011886 | 0.055224 | -0.007001 | 0.563030 | 0.012225 | -0.005944 | -0.013473 | 0.039539 | 0.044533 | 0.090000 | nan |
| AWABEDR | -0.047255 | 0.999970 | 0.033855 | -0.011997 | 0.214862 | -0.015964 | 0.143625 | 0.084773 | 0.076020 | 0.114593 | -0.032272 | 0.019363 | -0.008745 | -0.009688 | 0.224792 | 0.081178 | -0.002714 | -0.009024 | -0.019237 | 0.022836 | -0.001842 | -0.049260 | 1.000000 | 0.033947 | -0.019819 | 0.214820 | -0.016202 | 0.143651 | 0.084755 | 0.075702 | 0.114514 | -0.032287 | 0.018806 | -0.008745 | -0.009688 | 0.224820 | 0.045909 | -0.002714 | -0.009024 | -0.019235 | 0.022727 | -0.001921 | 0.000601 | nan |
| AWALAND | -0.111043 | 0.033807 | 0.999944 | 0.079418 | 0.026826 | -0.003981 | -0.005708 | 0.099720 | 0.556516 | 0.152525 | -0.010319 | 0.000296 | 0.039263 | 0.018200 | 0.048680 | 0.206762 | -0.003294 | 0.005073 | -0.015642 | 0.000937 | 0.003306 | -0.111496 | 0.033947 | 1.000000 | 0.084538 | 0.026826 | -0.003682 | -0.005708 | 0.099523 | 0.556786 | 0.152498 | -0.010123 | -0.000122 | 0.039221 | 0.018267 | 0.048716 | 0.130983 | -0.003294 | 0.005145 | -0.015689 | 0.000998 | 0.003175 | -0.021279 | nan |
| APERSAUT | 0.152571 | -0.019640 | 0.084595 | 0.949831 | 0.018165 | 0.060993 | 0.010545 | 0.036971 | 0.084904 | 0.032054 | -0.183393 | 0.068048 | 0.006398 | 0.047005 | -0.003710 | 0.093533 | -0.008585 | 0.034027 | -0.041116 | 0.016920 | 0.097158 | 0.142575 | -0.019819 | 0.084538 | 1.000000 | 0.018171 | 0.060946 | 0.010538 | 0.036983 | 0.084814 | 0.032087 | -0.182709 | 0.068628 | 0.006429 | 0.046937 | -0.003699 | 0.031569 | -0.008590 | 0.034069 | -0.040895 | 0.016865 | 0.097339 | 0.149490 | nan |
| ABESAUT | -0.040878 | 0.216492 | 0.026700 | 0.019014 | 0.999994 | 0.031393 | 0.238184 | 0.099597 | 0.047440 | 0.153608 | -0.024618 | 0.022213 | -0.006671 | -0.007390 | 0.054737 | 0.021870 | -0.002070 | -0.006884 | -0.002583 | -0.008047 | 0.021211 | -0.042225 | 0.214820 | 0.026826 | 0.018171 | 1.000000 | 0.031275 | 0.238192 | 0.099456 | 0.047142 | 0.153603 | -0.024629 | 0.022514 | -0.006671 | -0.007390 | 0.054695 | -0.005220 | -0.002070 | -0.006884 | -0.002658 | -0.008047 | 0.021291 | -0.006986 | nan |
| AMOTSCO | 0.023880 | -0.016128 | -0.003740 | 0.057821 | 0.031403 | 0.999814 | -0.007834 | -0.004010 | -0.002730 | -0.011979 | -0.043182 | 0.037025 | 0.022291 | -0.005046 | 0.001731 | 0.011351 | -0.004521 | -0.003088 | -0.014926 | 0.013069 | 0.021993 | 0.019443 | -0.016202 | -0.003682 | 0.060946 | 0.031275 | 1.000000 | -0.007834 | -0.004033 | -0.002734 | -0.011979 | -0.043143 | 0.036988 | 0.022333 | -0.005022 | 0.001721 | -0.011608 | -0.004521 | -0.003113 | -0.014889 | 0.013087 | 0.022062 | 0.010358 | nan |
| AVRAAUT | -0.022800 | 0.144678 | -0.005708 | 0.010817 | 0.238391 | -0.007833 | 1.000000 | 0.079530 | 0.051194 | 0.070747 | -0.010624 | -0.009054 | -0.002879 | -0.003189 | 0.067182 | 0.016463 | -0.000893 | -0.002971 | 0.021559 | -0.003473 | -0.004703 | -0.023345 | 0.143651 | -0.005708 | 0.010538 | 0.238192 | -0.007834 | 1.000000 | 0.079403 | 0.050691 | 0.070635 | -0.010629 | -0.009055 | -0.002879 | -0.003189 | 0.067134 | 0.000006 | -0.000893 | -0.002971 | 0.021386 | -0.003473 | -0.004703 | -0.009921 | nan |
| AAANHANG | -0.019434 | 0.085141 | 0.099085 | 0.045691 | 0.099385 | -0.003987 | 0.079446 | 0.999989 | 0.078122 | 0.103096 | -0.008924 | 0.004952 | 0.014638 | 0.011622 | 0.019650 | 0.048626 | 0.069534 | 0.035706 | -0.006692 | 0.046521 | -0.012700 | -0.020552 | 0.084755 | 0.099523 | 0.036983 | 0.099456 | -0.004033 | 0.079403 | 1.000000 | 0.078309 | 0.103210 | -0.009293 | 0.005852 | 0.014665 | 0.011667 | 0.019632 | 0.030662 | 0.069546 | 0.035467 | -0.006756 | 0.046553 | -0.012700 | 0.014540 | nan |
| ATRACTOR | -0.075159 | 0.075443 | 0.557292 | 0.080241 | 0.047097 | -0.003076 | 0.050680 | 0.078633 | 0.999956 | 0.214361 | -0.006887 | -0.005406 | 0.049704 | 0.015181 | 0.078786 | 0.168998 | -0.003603 | 0.002684 | -0.011321 | -0.001475 | 0.019121 | -0.075988 | 0.075702 | 0.556786 | 0.084814 | 0.047142 | -0.002734 | 0.050691 | 0.078309 | 1.000000 | 0.214364 | -0.007182 | -0.006192 | 0.049682 | 0.015152 | 0.078794 | 0.101040 | -0.003603 | 0.002699 | -0.011409 | -0.001419 | 0.018881 | -0.016650 | nan |
| AWERKT | -0.024928 | 0.115076 | 0.152128 | 0.031550 | 0.153848 | -0.011977 | 0.070672 | 0.103349 | 0.215087 | 0.999999 | 0.006287 | -0.000071 | 0.074539 | -0.004877 | 0.087559 | 0.057455 | -0.001366 | -0.004543 | -0.009684 | -0.005310 | -0.007191 | -0.025982 | 0.114514 | 0.152498 | 0.032087 | 0.153603 | -0.011979 | 0.070635 | 0.103210 | 0.214364 | 1.000000 | 0.006414 | -0.000995 | 0.074390 | -0.004877 | 0.087496 | 0.030810 | -0.001366 | -0.004543 | -0.009683 | -0.005310 | -0.007191 | -0.015170 | nan |
| ABROM | -0.152564 | -0.032286 | -0.010112 | -0.175535 | -0.024629 | -0.043110 | -0.010629 | -0.009300 | -0.007289 | 0.006422 | 0.999482 | -0.046309 | -0.019764 | -0.004976 | -0.017012 | -0.176473 | -0.006133 | -0.020395 | -0.026125 | -0.023841 | -0.014957 | -0.153167 | -0.032287 | -0.010123 | -0.182709 | -0.024629 | -0.043143 | -0.010629 | -0.009293 | -0.007182 | 0.006414 | 1.000000 | -0.046634 | -0.019764 | -0.004994 | -0.017012 | -0.198651 | -0.006133 | -0.020395 | -0.026231 | -0.023841 | -0.014965 | -0.045145 | nan |
| ALEVEN | 0.140592 | 0.019100 | 0.000111 | 0.071357 | 0.022626 | 0.036714 | -0.009055 | 0.005718 | -0.006281 | -0.000999 | -0.046699 | 0.999237 | 0.036003 | 0.127982 | -0.002230 | 0.131198 | -0.005225 | 0.003188 | -0.001787 | 0.025053 | 0.025198 | 0.137124 | 0.018806 | -0.000122 | 0.068628 | 0.022514 | 0.036988 | -0.009055 | 0.005852 | -0.006192 | -0.000995 | -0.046634 | 1.000000 | 0.036012 | 0.128055 | -0.002239 | 0.106750 | -0.005225 | 0.003111 | -0.001877 | 0.025061 | 0.025169 | 0.019285 | nan |
| APERSONG | -0.010157 | -0.008745 | 0.039732 | 0.010432 | -0.006671 | 0.022946 | -0.002879 | 0.014730 | 0.049647 | 0.074268 | -0.019755 | 0.037279 | 0.999996 | -0.005930 | -0.004608 | 0.013179 | -0.001661 | 0.025763 | -0.011776 | -0.006457 | -0.008745 | -0.011886 | -0.008745 | 0.039221 | 0.006429 | -0.006671 | 0.022333 | -0.002879 | 0.014665 | 0.049682 | 0.074390 | -0.019764 | 0.036012 | 1.000000 | -0.005930 | -0.004608 | 0.010772 | -0.001661 | 0.025903 | -0.011775 | -0.006457 | -0.008745 | -0.008492 | nan |
| AGEZONG | 0.056973 | -0.009688 | 0.018274 | 0.046916 | -0.007390 | -0.005153 | -0.003189 | 0.011527 | 0.015157 | -0.004877 | -0.005085 | 0.128624 | -0.005930 | 0.999995 | -0.005105 | 0.059521 | -0.001840 | -0.006120 | 0.014148 | 0.017232 | 0.117479 | 0.055224 | -0.009688 | 0.018267 | 0.046937 | -0.007390 | -0.005022 | -0.003189 | 0.011667 | 0.015152 | -0.004877 | -0.004994 | 0.128055 | -0.005930 | 1.000000 | -0.005105 | 0.043093 | -0.001840 | -0.006120 | 0.014740 | 0.017198 | 0.116997 | 0.033548 | nan |
| AWAOREG | -0.001384 | 0.224926 | 0.049310 | -0.001959 | 0.054851 | 0.002163 | 0.067170 | 0.019636 | 0.078898 | 0.087606 | -0.017005 | -0.001888 | -0.004608 | -0.005105 | 0.999998 | 0.053801 | -0.001430 | -0.004755 | -0.010136 | -0.005558 | -0.007527 | -0.007001 | 0.224820 | 0.048716 | -0.003699 | 0.054695 | 0.001721 | 0.067134 | 0.019632 | 0.078794 | 0.087496 | -0.017012 | -0.002239 | -0.004608 | -0.005105 | 1.000000 | 0.030626 | -0.001430 | -0.004755 | -0.010135 | -0.005558 | -0.007527 | 0.030290 | nan |
| ABRAND | 0.558860 | 0.046095 | 0.130971 | 0.047518 | -0.005249 | -0.012002 | 0.000005 | 0.030697 | 0.100894 | 0.030828 | -0.199225 | 0.105966 | 0.010739 | 0.043113 | 0.030579 | 0.915881 | 0.004889 | 0.003014 | -0.041336 | 0.028702 | 0.037231 | 0.563030 | 0.045909 | 0.130983 | 0.031569 | -0.005220 | -0.011608 | 0.000006 | 0.030662 | 0.101040 | 0.030810 | -0.198651 | 0.106750 | 0.010772 | 0.043093 | 0.030626 | 1.000000 | 0.004887 | 0.002976 | -0.041102 | 0.028684 | 0.036986 | 0.069493 | nan |
| AZEILPL | 0.013123 | -0.002714 | -0.003294 | -0.006317 | -0.002070 | -0.004520 | -0.000893 | 0.069755 | -0.003603 | -0.001366 | -0.006131 | -0.005225 | -0.001661 | -0.001840 | -0.001430 | 0.011410 | 1.000000 | 0.099620 | -0.003654 | 0.084018 | -0.002714 | 0.012225 | -0.002714 | -0.003294 | -0.008590 | -0.002070 | -0.004521 | -0.000893 | 0.069546 | -0.003603 | -0.001366 | -0.006133 | -0.005225 | -0.001661 | -0.001840 | -0.001430 | 0.004887 | 1.000000 | 0.099067 | -0.003654 | 0.084404 | -0.002714 | 0.026201 | nan |
| APLEZIER | -0.004017 | -0.009024 | 0.004987 | 0.036233 | -0.006884 | -0.002743 | -0.002971 | 0.035594 | 0.002633 | -0.004543 | -0.020386 | 0.004059 | 0.025865 | -0.006120 | -0.004755 | 0.014869 | 0.099049 | 0.999995 | -0.012152 | 0.045478 | 0.010394 | -0.005944 | -0.009024 | 0.005145 | 0.034069 | -0.006884 | -0.003113 | -0.002971 | 0.035467 | 0.002699 | -0.004543 | -0.020395 | 0.003111 | 0.025903 | -0.006120 | -0.004755 | 0.002976 | 0.099067 | 1.000000 | -0.012151 | 0.045559 | 0.010491 | 0.106438 | nan |
| AFIETS | -0.011138 | -0.019235 | -0.015763 | -0.036053 | -0.002627 | -0.014679 | 0.021362 | -0.006726 | -0.011438 | -0.009683 | -0.026058 | -0.001979 | -0.011775 | 0.014678 | -0.010135 | -0.037218 | -0.003654 | -0.012151 | 0.999939 | 0.010601 | 0.008656 | -0.013473 | -0.019235 | -0.015689 | -0.040895 | -0.002658 | -0.014889 | 0.021386 | -0.006756 | -0.011409 | -0.009683 | -0.026231 | -0.001877 | -0.011775 | 0.014740 | -0.010135 | -0.041102 | -0.003654 | -0.012151 | 1.000000 | 0.010639 | 0.008691 | 0.028869 | nan |
| AINBOED | 0.042850 | 0.022781 | 0.001140 | 0.018161 | -0.008047 | 0.013159 | -0.003473 | 0.046351 | -0.001475 | -0.005310 | -0.023831 | 0.025088 | -0.006457 | 0.017144 | -0.005558 | 0.035995 | 0.084389 | 0.045757 | 0.010795 | 0.999992 | 0.022938 | 0.039539 | 0.022727 | 0.000998 | 0.016865 | -0.008047 | 0.013087 | -0.003473 | 0.046553 | -0.001419 | -0.005310 | -0.023841 | 0.025061 | -0.006457 | 0.017198 | -0.005558 | 0.028684 | 0.084404 | 0.045559 | 0.010639 | 1.000000 | 0.022727 | 0.019106 | nan |
| ABYSTAND | 0.047635 | -0.001887 | 0.002974 | 0.091598 | 0.021374 | 0.022319 | -0.004703 | -0.012700 | 0.018860 | -0.007191 | -0.015368 | 0.027114 | -0.008745 | 0.117187 | -0.007527 | 0.055778 | -0.002714 | 0.010421 | 0.008632 | 0.022836 | 0.999970 | 0.044533 | -0.001921 | 0.003175 | 0.097339 | 0.021291 | 0.022062 | -0.004703 | -0.012700 | 0.018881 | -0.007191 | -0.014965 | 0.025169 | -0.008745 | 0.116997 | -0.007527 | 0.036986 | -0.002714 | 0.010491 | 0.008691 | 0.022727 | 1.000000 | 0.068222 | nan |
| CARAVAN | 0.095332 | 0.000573 | -0.021325 | 0.163670 | -0.006945 | 0.009914 | -0.009921 | 0.014593 | -0.016444 | -0.015170 | -0.045199 | 0.018654 | -0.008504 | 0.033663 | 0.030322 | 0.100531 | 0.026196 | 0.106366 | 0.028695 | 0.019017 | 0.068132 | 0.090000 | 0.000601 | -0.021279 | 0.149490 | -0.006986 | 0.010358 | -0.009921 | 0.014540 | -0.016650 | -0.015170 | -0.045145 | 0.019285 | -0.008492 | 0.033548 | 0.030290 | 0.069493 | 0.026201 | 0.106438 | 0.028869 | 0.019106 | 0.068222 | 1.000000 | nan |
| TRAIN | nan | nan | nan | nan | nan | nan | nan | nan | nan | nan | nan | nan | nan | nan | nan | nan | nan | nan | nan | nan | nan | nan | nan | nan | nan | nan | nan | nan | nan | nan | nan | nan | nan | nan | nan | nan | nan | nan | nan | nan | nan | nan | nan | nan |
def cramers_v(var1,var2):
crosstab =np.array(pd.crosstab(var1,var2, rownames=None, colnames=None))
chi2 = ss.chi2_contingency(crosstab)[0]
n = crosstab.sum()
phi2 = chi2 / n
r, k = crosstab.shape
phi2corr = max(0, phi2 - ((k-1)*(r-1))/(n-1))
rcorr = r - ((r-1)**2)/(n-1)
kcorr = k - ((k-1)**2)/(n-1)
return np.sqrt(phi2corr / min((kcorr-1),(rcorr-1)))
We use Cramer´s V to observ categorical variables that we considered be highly correlated, based on the descripcion of our dictionary.
cramers_v(df_ticdata_categorical_variables["MOSTYPE"], df_ticdata_categorical_variables["MOSHOOFD"])
0.9974157937677048
#Average income vs Purchasing power class
cramers_v(df_ticdata_categorical_variables["MINKGEM"], df_ticdata_categorical_variables["MKOOPKLA"])
0.22021007250782393
# High status vs income > 123.000
cramers_v(df_ticdata_categorical_variables["MBERHOOG"], df_ticdata_categorical_variables["MINK123M"])
0.16893247903544156
# High level of education vs income > 123.000
cramers_v(df_ticdata_categorical_variables["MOPLHOOG"], df_ticdata_categorical_variables["MINK123M"])
0.1991300006594408
# social class D vs Unskilled labours
cramers_v(df_ticdata_categorical_variables["MSKD"], df_ticdata_categorical_variables["MBERARBO"])
0.26855759802471696
After comprobate some variables, we understood could be highly correlated, we look what only Customer subtype and Customer main type. For a future model we could rescind one of them.
rows = []
for var1 in df_ticdata_categorical_variables:
col = []
for var2 in df_ticdata_categorical_variables:
cramers = cramers_v(df_ticdata_categorical_variables[var1], df_ticdata_categorical_variables[var2])
col.append(round(cramers, 2))
rows.append(col)
cramers_results = np.array(rows)
df_vcramer = pd.DataFrame(cramers_results, columns = df_ticdata_categorical_variables.columns, index = df_ticdata_categorical_variables.columns)
df_vcramer
| MOSTYPE | MOSHOOFD | MGODRK | MGODPR | MGODOV | MGODGE | MRELGE | MRELSA | MRELOV | MFALLEEN | MFGEKIND | MFWEKIND | MOPLHOOG | MOPLMIDD | MOPLLAAG | MBERHOOG | MBERZELF | MBERBOER | MBERMIDD | MBERARBG | MBERARBO | MSKA | MSKB1 | MSKB2 | MSKC | MSKD | MHHUUR | MHKOOP | MAUT1 | MAUT2 | MAUT0 | MZFONDS | MZPART | MINKM30 | MINK3045 | MINK4575 | MINK7512 | MINK123M | MINKGEM | MKOOPKLA | PWAPART | PWABEDR | PWALAND | PPERSAUT | PBESAUT | PMOTSCO | PVRAAUT | PAANHANG | PTRACTOR | PWERKT | PBROM | PLEVEN | PPERSONG | PGEZONG | PWAOREG | PBRAND | PZEILPL | PPLEZIER | PFIETS | PINBOED | PBYSTAND | CARAVAN | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| MOSTYPE | 1.00 | 1.00 | 0.16 | 0.19 | 0.21 | 0.19 | 0.28 | 0.20 | 0.29 | 0.28 | 0.18 | 0.27 | 0.29 | 0.22 | 0.30 | 0.26 | 0.30 | 0.33 | 0.21 | 0.23 | 0.23 | 0.27 | 0.18 | 0.21 | 0.25 | 0.22 | 0.29 | 0.29 | 0.24 | 0.24 | 0.28 | 0.26 | 0.26 | 0.23 | 0.21 | 0.25 | 0.16 | 0.14 | 0.26 | 0.95 | 0.09 | 0.02 | 0.10 | 0.04 | 0.05 | 0.05 | 0.00 | 0.07 | 0.12 | 0.08 | 0.08 | 0.00 | 0.00 | 0.02 | 0.05 | 0.17 | 0.00 | 0.02 | 0.05 | 0.03 | 0.04 | 0.12 |
| MOSHOOFD | 1.00 | 1.00 | 0.09 | 0.11 | 0.12 | 0.10 | 0.21 | 0.14 | 0.20 | 0.20 | 0.11 | 0.20 | 0.21 | 0.15 | 0.22 | 0.18 | 0.12 | 0.22 | 0.13 | 0.16 | 0.17 | 0.19 | 0.12 | 0.13 | 0.19 | 0.15 | 0.21 | 0.21 | 0.15 | 0.14 | 0.18 | 0.19 | 0.19 | 0.17 | 0.13 | 0.18 | 0.10 | 0.08 | 0.20 | 0.61 | 0.07 | 0.02 | 0.11 | 0.05 | 0.00 | 0.03 | 0.01 | 0.04 | 0.13 | 0.07 | 0.04 | 0.04 | 0.02 | 0.03 | 0.00 | 0.16 | 0.03 | 0.00 | 0.03 | 0.02 | 0.05 | 0.12 |
| MGODRK | 0.16 | 0.09 | 1.00 | 0.22 | 0.17 | 0.12 | 0.10 | 0.17 | 0.10 | 0.07 | 0.07 | 0.08 | 0.12 | 0.09 | 0.11 | 0.12 | 0.16 | 0.06 | 0.09 | 0.09 | 0.08 | 0.10 | 0.09 | 0.08 | 0.10 | 0.07 | 0.09 | 0.09 | 0.13 | 0.14 | 0.12 | 0.14 | 0.14 | 0.10 | 0.08 | 0.08 | 0.14 | 0.13 | 0.13 | 0.10 | 0.02 | 0.00 | 0.01 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | 0.01 | 0.00 | 0.00 | 0.00 | 0.00 | 0.01 | 0.00 | 0.00 | 0.02 | 0.00 | 0.00 | 0.00 |
| MGODPR | 0.19 | 0.11 | 0.22 | 1.00 | 0.25 | 0.43 | 0.14 | 0.17 | 0.13 | 0.10 | 0.11 | 0.11 | 0.11 | 0.11 | 0.11 | 0.13 | 0.14 | 0.11 | 0.13 | 0.12 | 0.12 | 0.11 | 0.09 | 0.12 | 0.11 | 0.12 | 0.14 | 0.14 | 0.15 | 0.16 | 0.15 | 0.12 | 0.12 | 0.12 | 0.10 | 0.11 | 0.08 | 0.08 | 0.12 | 0.10 | 0.00 | 0.00 | 0.04 | 0.06 | 0.00 | 0.00 | 0.00 | 0.00 | 0.02 | 0.03 | 0.02 | 0.00 | 0.01 | 0.00 | 0.00 | 0.06 | 0.00 | 0.02 | 0.00 | 0.00 | 0.02 | 0.05 |
| MGODOV | 0.21 | 0.12 | 0.17 | 0.25 | 1.00 | 0.16 | 0.18 | 0.24 | 0.20 | 0.14 | 0.13 | 0.14 | 0.12 | 0.12 | 0.10 | 0.15 | 0.12 | 0.20 | 0.13 | 0.15 | 0.13 | 0.15 | 0.12 | 0.13 | 0.11 | 0.13 | 0.16 | 0.16 | 0.16 | 0.14 | 0.20 | 0.16 | 0.16 | 0.11 | 0.12 | 0.15 | 0.08 | 0.09 | 0.10 | 0.09 | 0.02 | 0.00 | 0.02 | 0.01 | 0.00 | 0.00 | 0.00 | 0.00 | 0.04 | 0.02 | 0.02 | 0.03 | 0.00 | 0.00 | 0.00 | 0.03 | 0.00 | 0.00 | 0.02 | 0.00 | 0.00 | 0.03 |
| MGODGE | 0.19 | 0.10 | 0.12 | 0.43 | 0.16 | 1.00 | 0.14 | 0.16 | 0.14 | 0.09 | 0.12 | 0.11 | 0.09 | 0.10 | 0.10 | 0.13 | 0.14 | 0.10 | 0.11 | 0.11 | 0.10 | 0.10 | 0.08 | 0.10 | 0.10 | 0.11 | 0.14 | 0.14 | 0.12 | 0.11 | 0.13 | 0.14 | 0.14 | 0.12 | 0.09 | 0.11 | 0.04 | 0.09 | 0.11 | 0.09 | 0.00 | 0.00 | 0.03 | 0.01 | 0.00 | 0.00 | 0.00 | 0.00 | 0.01 | 0.00 | 0.00 | 0.00 | 0.02 | 0.00 | 0.01 | 0.05 | 0.00 | 0.00 | 0.04 | 0.00 | 0.01 | 0.04 |
| MRELGE | 0.28 | 0.21 | 0.10 | 0.14 | 0.18 | 0.14 | 1.00 | 0.31 | 0.61 | 0.35 | 0.12 | 0.25 | 0.12 | 0.12 | 0.14 | 0.12 | 0.10 | 0.11 | 0.11 | 0.12 | 0.17 | 0.11 | 0.12 | 0.10 | 0.11 | 0.16 | 0.20 | 0.20 | 0.24 | 0.16 | 0.31 | 0.18 | 0.18 | 0.21 | 0.12 | 0.15 | 0.10 | 0.07 | 0.19 | 0.17 | 0.04 | 0.00 | 0.02 | 0.02 | 0.00 | 0.03 | 0.00 | 0.00 | 0.02 | 0.00 | 0.00 | 0.01 | 0.00 | 0.01 | 0.00 | 0.07 | 0.00 | 0.02 | 0.04 | 0.03 | 0.02 | 0.06 |
| MRELSA | 0.20 | 0.14 | 0.17 | 0.17 | 0.24 | 0.16 | 0.31 | 1.00 | 0.16 | 0.13 | 0.14 | 0.12 | 0.09 | 0.10 | 0.09 | 0.12 | 0.14 | 0.09 | 0.12 | 0.11 | 0.12 | 0.11 | 0.12 | 0.10 | 0.10 | 0.11 | 0.15 | 0.14 | 0.13 | 0.14 | 0.16 | 0.17 | 0.17 | 0.10 | 0.09 | 0.11 | 0.09 | 0.07 | 0.08 | 0.08 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | 0.03 | 0.00 | 0.00 | 0.02 | 0.00 | 0.00 | 0.01 |
| MRELOV | 0.29 | 0.20 | 0.10 | 0.13 | 0.20 | 0.14 | 0.61 | 0.16 | 1.00 | 0.46 | 0.13 | 0.22 | 0.10 | 0.10 | 0.13 | 0.11 | 0.12 | 0.10 | 0.11 | 0.12 | 0.17 | 0.10 | 0.09 | 0.12 | 0.11 | 0.18 | 0.19 | 0.19 | 0.31 | 0.18 | 0.39 | 0.16 | 0.16 | 0.23 | 0.12 | 0.14 | 0.09 | 0.07 | 0.23 | 0.17 | 0.04 | 0.00 | 0.04 | 0.02 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | 0.02 | 0.00 | 0.06 | 0.00 | 0.00 | 0.06 | 0.02 | 0.00 | 0.06 |
| MFALLEEN | 0.28 | 0.20 | 0.07 | 0.10 | 0.14 | 0.09 | 0.35 | 0.13 | 0.46 | 1.00 | 0.18 | 0.32 | 0.14 | 0.12 | 0.14 | 0.12 | 0.13 | 0.10 | 0.11 | 0.13 | 0.12 | 0.12 | 0.18 | 0.16 | 0.12 | 0.18 | 0.19 | 0.19 | 0.21 | 0.12 | 0.28 | 0.12 | 0.12 | 0.23 | 0.13 | 0.15 | 0.10 | 0.08 | 0.20 | 0.16 | 0.04 | 0.00 | 0.01 | 0.01 | 0.01 | 0.00 | 0.00 | 0.00 | 0.02 | 0.00 | 0.00 | 0.00 | 0.00 | 0.01 | 0.00 | 0.07 | 0.00 | 0.00 | 0.00 | 0.00 | 0.02 | 0.04 |
| MFGEKIND | 0.18 | 0.11 | 0.07 | 0.11 | 0.13 | 0.12 | 0.12 | 0.14 | 0.13 | 0.18 | 1.00 | 0.44 | 0.11 | 0.18 | 0.15 | 0.12 | 0.13 | 0.08 | 0.16 | 0.12 | 0.11 | 0.16 | 0.16 | 0.16 | 0.12 | 0.11 | 0.14 | 0.14 | 0.12 | 0.10 | 0.10 | 0.10 | 0.10 | 0.13 | 0.14 | 0.12 | 0.08 | 0.09 | 0.13 | 0.11 | 0.00 | 0.00 | 0.00 | 0.01 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | 0.05 | 0.00 | 0.03 | 0.00 | 0.00 | 0.02 | 0.00 | 0.03 | 0.00 |
| MFWEKIND | 0.27 | 0.20 | 0.08 | 0.11 | 0.14 | 0.11 | 0.25 | 0.12 | 0.22 | 0.32 | 0.44 | 1.00 | 0.10 | 0.14 | 0.14 | 0.11 | 0.10 | 0.11 | 0.14 | 0.13 | 0.11 | 0.12 | 0.14 | 0.15 | 0.11 | 0.14 | 0.17 | 0.17 | 0.16 | 0.13 | 0.18 | 0.09 | 0.09 | 0.16 | 0.13 | 0.15 | 0.09 | 0.08 | 0.13 | 0.16 | 0.04 | 0.01 | 0.03 | 0.03 | 0.00 | 0.00 | 0.00 | 0.00 | 0.03 | 0.00 | 0.02 | 0.00 | 0.00 | 0.02 | 0.03 | 0.07 | 0.00 | 0.00 | 0.03 | 0.02 | 0.01 | 0.02 |
| MOPLHOOG | 0.29 | 0.21 | 0.12 | 0.11 | 0.12 | 0.09 | 0.12 | 0.09 | 0.10 | 0.14 | 0.11 | 0.10 | 1.00 | 0.17 | 0.29 | 0.31 | 0.26 | 0.12 | 0.15 | 0.18 | 0.16 | 0.37 | 0.20 | 0.16 | 0.24 | 0.15 | 0.18 | 0.18 | 0.11 | 0.12 | 0.13 | 0.25 | 0.24 | 0.16 | 0.17 | 0.20 | 0.21 | 0.20 | 0.24 | 0.21 | 0.03 | 0.00 | 0.02 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | 0.02 | 0.00 | 0.03 | 0.00 | 0.00 | 0.00 | 0.00 | 0.03 | 0.00 | 0.00 | 0.06 | 0.03 | 0.03 | 0.08 |
| MOPLMIDD | 0.22 | 0.15 | 0.09 | 0.11 | 0.12 | 0.10 | 0.12 | 0.10 | 0.10 | 0.12 | 0.18 | 0.14 | 0.17 | 1.00 | 0.54 | 0.14 | 0.14 | 0.12 | 0.25 | 0.19 | 0.13 | 0.16 | 0.24 | 0.22 | 0.22 | 0.19 | 0.15 | 0.15 | 0.11 | 0.10 | 0.09 | 0.13 | 0.13 | 0.13 | 0.16 | 0.14 | 0.09 | 0.07 | 0.13 | 0.13 | 0.01 | 0.00 | 0.00 | 0.03 | 0.02 | 0.00 | 0.00 | 0.00 | 0.02 | 0.00 | 0.02 | 0.04 | 0.00 | 0.04 | 0.00 | 0.03 | 0.00 | 0.00 | 0.04 | 0.00 | 0.03 | 0.04 |
| MOPLLAAG | 0.30 | 0.22 | 0.11 | 0.11 | 0.10 | 0.10 | 0.14 | 0.09 | 0.13 | 0.14 | 0.15 | 0.14 | 0.29 | 0.54 | 1.00 | 0.26 | 0.19 | 0.14 | 0.21 | 0.25 | 0.21 | 0.31 | 0.24 | 0.21 | 0.39 | 0.23 | 0.20 | 0.19 | 0.13 | 0.12 | 0.13 | 0.22 | 0.22 | 0.18 | 0.17 | 0.20 | 0.14 | 0.11 | 0.21 | 0.22 | 0.04 | 0.00 | 0.03 | 0.02 | 0.00 | 0.00 | 0.00 | 0.00 | 0.03 | 0.00 | 0.01 | 0.03 | 0.00 | 0.04 | 0.00 | 0.04 | 0.00 | 0.00 | 0.04 | 0.03 | 0.02 | 0.09 |
| MBERHOOG | 0.26 | 0.18 | 0.12 | 0.13 | 0.15 | 0.13 | 0.12 | 0.12 | 0.11 | 0.12 | 0.12 | 0.11 | 0.31 | 0.14 | 0.26 | 1.00 | 0.18 | 0.11 | 0.18 | 0.20 | 0.21 | 0.49 | 0.17 | 0.19 | 0.25 | 0.15 | 0.18 | 0.18 | 0.14 | 0.17 | 0.15 | 0.30 | 0.30 | 0.15 | 0.13 | 0.21 | 0.18 | 0.17 | 0.22 | 0.21 | 0.04 | 0.02 | 0.01 | 0.02 | 0.00 | 0.00 | 0.00 | 0.00 | 0.01 | 0.00 | 0.00 | 0.02 | 0.00 | 0.03 | 0.02 | 0.04 | 0.04 | 0.00 | 0.05 | 0.04 | 0.00 | 0.07 |
| MBERZELF | 0.30 | 0.12 | 0.16 | 0.14 | 0.12 | 0.14 | 0.10 | 0.14 | 0.12 | 0.13 | 0.13 | 0.10 | 0.26 | 0.14 | 0.19 | 0.18 | 1.00 | 0.25 | 0.16 | 0.16 | 0.17 | 0.34 | 0.14 | 0.14 | 0.16 | 0.17 | 0.16 | 0.16 | 0.12 | 0.18 | 0.13 | 0.18 | 0.18 | 0.15 | 0.14 | 0.23 | 0.19 | 0.17 | 0.20 | 0.10 | 0.01 | 0.00 | 0.00 | 0.02 | 0.00 | 0.00 | 0.00 | 0.00 | 0.01 | 0.02 | 0.00 | 0.03 | 0.00 | 0.00 | 0.00 | 0.08 | 0.03 | 0.02 | 0.02 | 0.00 | 0.03 | 0.00 |
| MBERBOER | 0.33 | 0.22 | 0.06 | 0.11 | 0.20 | 0.10 | 0.11 | 0.09 | 0.10 | 0.10 | 0.08 | 0.11 | 0.12 | 0.12 | 0.14 | 0.11 | 0.25 | 1.00 | 0.14 | 0.11 | 0.12 | 0.18 | 0.09 | 0.17 | 0.13 | 0.14 | 0.15 | 0.15 | 0.12 | 0.14 | 0.11 | 0.13 | 0.13 | 0.11 | 0.11 | 0.15 | 0.09 | 0.06 | 0.09 | 0.12 | 0.04 | 0.02 | 0.12 | 0.02 | 0.05 | 0.00 | 0.02 | 0.06 | 0.14 | 0.13 | 0.03 | 0.02 | 0.03 | 0.00 | 0.00 | 0.09 | 0.07 | 0.00 | 0.04 | 0.00 | 0.00 | 0.04 |
| MBERMIDD | 0.21 | 0.13 | 0.09 | 0.13 | 0.13 | 0.11 | 0.11 | 0.12 | 0.11 | 0.11 | 0.16 | 0.14 | 0.15 | 0.25 | 0.21 | 0.18 | 0.16 | 0.14 | 1.00 | 0.21 | 0.20 | 0.16 | 0.29 | 0.17 | 0.17 | 0.15 | 0.13 | 0.13 | 0.15 | 0.13 | 0.12 | 0.14 | 0.14 | 0.13 | 0.15 | 0.12 | 0.10 | 0.10 | 0.14 | 0.12 | 0.00 | 0.00 | 0.02 | 0.02 | 0.00 | 0.02 | 0.00 | 0.00 | 0.03 | 0.02 | 0.00 | 0.00 | 0.00 | 0.00 | 0.03 | 0.04 | 0.00 | 0.00 | 0.02 | 0.00 | 0.02 | 0.05 |
| MBERARBG | 0.23 | 0.16 | 0.09 | 0.12 | 0.15 | 0.11 | 0.12 | 0.11 | 0.12 | 0.13 | 0.12 | 0.13 | 0.18 | 0.19 | 0.25 | 0.20 | 0.16 | 0.11 | 0.21 | 1.00 | 0.17 | 0.18 | 0.16 | 0.15 | 0.33 | 0.15 | 0.16 | 0.16 | 0.12 | 0.14 | 0.12 | 0.18 | 0.18 | 0.13 | 0.13 | 0.16 | 0.14 | 0.12 | 0.14 | 0.15 | 0.02 | 0.00 | 0.00 | 0.03 | 0.00 | 0.00 | 0.00 | 0.00 | 0.01 | 0.00 | 0.00 | 0.00 | 0.00 | 0.01 | 0.00 | 0.03 | 0.00 | 0.00 | 0.04 | 0.00 | 0.00 | 0.06 |
| MBERARBO | 0.23 | 0.17 | 0.08 | 0.12 | 0.13 | 0.10 | 0.17 | 0.12 | 0.17 | 0.12 | 0.11 | 0.11 | 0.16 | 0.13 | 0.21 | 0.21 | 0.17 | 0.12 | 0.20 | 0.17 | 1.00 | 0.19 | 0.13 | 0.14 | 0.21 | 0.27 | 0.19 | 0.18 | 0.15 | 0.12 | 0.17 | 0.17 | 0.17 | 0.20 | 0.14 | 0.15 | 0.13 | 0.11 | 0.17 | 0.17 | 0.01 | 0.00 | 0.00 | 0.01 | 0.02 | 0.00 | 0.00 | 0.00 | 0.02 | 0.00 | 0.01 | 0.00 | 0.00 | 0.00 | 0.00 | 0.03 | 0.00 | 0.00 | 0.06 | 0.00 | 0.02 | 0.05 |
| MSKA | 0.27 | 0.19 | 0.10 | 0.11 | 0.15 | 0.10 | 0.11 | 0.11 | 0.10 | 0.12 | 0.16 | 0.12 | 0.37 | 0.16 | 0.31 | 0.49 | 0.34 | 0.18 | 0.16 | 0.18 | 0.19 | 1.00 | 0.15 | 0.16 | 0.27 | 0.16 | 0.19 | 0.19 | 0.11 | 0.14 | 0.14 | 0.29 | 0.29 | 0.15 | 0.17 | 0.26 | 0.18 | 0.16 | 0.22 | 0.18 | 0.03 | 0.00 | 0.02 | 0.02 | 0.00 | 0.00 | 0.00 | 0.00 | 0.02 | 0.00 | 0.01 | 0.03 | 0.00 | 0.01 | 0.02 | 0.05 | 0.05 | 0.00 | 0.04 | 0.05 | 0.04 | 0.09 |
| MSKB1 | 0.18 | 0.12 | 0.09 | 0.09 | 0.12 | 0.08 | 0.12 | 0.12 | 0.09 | 0.18 | 0.16 | 0.14 | 0.20 | 0.24 | 0.24 | 0.17 | 0.14 | 0.09 | 0.29 | 0.16 | 0.13 | 0.15 | 1.00 | 0.15 | 0.18 | 0.17 | 0.11 | 0.11 | 0.09 | 0.09 | 0.08 | 0.12 | 0.12 | 0.12 | 0.12 | 0.17 | 0.11 | 0.16 | 0.12 | 0.12 | 0.01 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | 0.04 | 0.00 | 0.03 | 0.00 | 0.00 | 0.04 | 0.03 | 0.00 | 0.05 |
| MSKB2 | 0.21 | 0.13 | 0.08 | 0.12 | 0.13 | 0.10 | 0.10 | 0.10 | 0.12 | 0.16 | 0.16 | 0.15 | 0.16 | 0.22 | 0.21 | 0.19 | 0.14 | 0.17 | 0.17 | 0.15 | 0.14 | 0.16 | 0.15 | 1.00 | 0.24 | 0.17 | 0.15 | 0.15 | 0.10 | 0.10 | 0.12 | 0.12 | 0.12 | 0.13 | 0.14 | 0.15 | 0.12 | 0.09 | 0.13 | 0.10 | 0.00 | 0.06 | 0.02 | 0.01 | 0.00 | 0.00 | 0.00 | 0.00 | 0.06 | 0.00 | 0.00 | 0.01 | 0.00 | 0.00 | 0.25 | 0.03 | 0.02 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 |
| MSKC | 0.25 | 0.19 | 0.10 | 0.11 | 0.11 | 0.10 | 0.11 | 0.10 | 0.11 | 0.12 | 0.12 | 0.11 | 0.24 | 0.22 | 0.39 | 0.25 | 0.16 | 0.13 | 0.17 | 0.33 | 0.21 | 0.27 | 0.18 | 0.24 | 1.00 | 0.16 | 0.19 | 0.20 | 0.11 | 0.12 | 0.12 | 0.22 | 0.22 | 0.16 | 0.17 | 0.17 | 0.14 | 0.11 | 0.18 | 0.18 | 0.02 | 0.00 | 0.00 | 0.02 | 0.00 | 0.00 | 0.00 | 0.00 | 0.02 | 0.00 | 0.00 | 0.00 | 0.00 | 0.02 | 0.00 | 0.03 | 0.00 | 0.02 | 0.01 | 0.02 | 0.00 | 0.07 |
| MSKD | 0.22 | 0.15 | 0.07 | 0.12 | 0.13 | 0.11 | 0.16 | 0.11 | 0.18 | 0.18 | 0.11 | 0.14 | 0.15 | 0.19 | 0.23 | 0.15 | 0.17 | 0.14 | 0.15 | 0.15 | 0.27 | 0.16 | 0.17 | 0.17 | 0.16 | 1.00 | 0.17 | 0.17 | 0.18 | 0.10 | 0.21 | 0.16 | 0.16 | 0.18 | 0.14 | 0.17 | 0.11 | 0.11 | 0.16 | 0.16 | 0.03 | 0.00 | 0.00 | 0.00 | 0.01 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | 0.02 | 0.00 | 0.00 | 0.04 | 0.00 | 0.04 | 0.00 | 0.00 | 0.00 | 0.06 |
| MHHUUR | 0.29 | 0.21 | 0.09 | 0.14 | 0.16 | 0.14 | 0.20 | 0.15 | 0.19 | 0.19 | 0.14 | 0.17 | 0.18 | 0.15 | 0.20 | 0.18 | 0.16 | 0.15 | 0.13 | 0.16 | 0.19 | 0.19 | 0.11 | 0.15 | 0.19 | 0.17 | 1.00 | 0.99 | 0.16 | 0.19 | 0.18 | 0.20 | 0.20 | 0.22 | 0.11 | 0.19 | 0.15 | 0.11 | 0.19 | 0.21 | 0.03 | 0.02 | 0.04 | 0.04 | 0.00 | 0.00 | 0.01 | 0.01 | 0.05 | 0.00 | 0.00 | 0.01 | 0.00 | 0.03 | 0.02 | 0.11 | 0.00 | 0.00 | 0.03 | 0.00 | 0.03 | 0.08 |
| MHKOOP | 0.29 | 0.21 | 0.09 | 0.14 | 0.16 | 0.14 | 0.20 | 0.14 | 0.19 | 0.19 | 0.14 | 0.17 | 0.18 | 0.15 | 0.19 | 0.18 | 0.16 | 0.15 | 0.13 | 0.16 | 0.18 | 0.19 | 0.11 | 0.15 | 0.20 | 0.17 | 0.99 | 1.00 | 0.16 | 0.19 | 0.18 | 0.20 | 0.20 | 0.22 | 0.11 | 0.19 | 0.15 | 0.11 | 0.19 | 0.21 | 0.03 | 0.02 | 0.04 | 0.04 | 0.00 | 0.00 | 0.01 | 0.01 | 0.05 | 0.00 | 0.00 | 0.01 | 0.00 | 0.03 | 0.02 | 0.11 | 0.00 | 0.00 | 0.03 | 0.00 | 0.03 | 0.08 |
| MAUT1 | 0.24 | 0.15 | 0.13 | 0.15 | 0.16 | 0.12 | 0.24 | 0.13 | 0.31 | 0.21 | 0.12 | 0.16 | 0.11 | 0.11 | 0.13 | 0.14 | 0.12 | 0.12 | 0.15 | 0.12 | 0.15 | 0.11 | 0.09 | 0.10 | 0.11 | 0.18 | 0.16 | 0.16 | 1.00 | 0.29 | 0.64 | 0.14 | 0.14 | 0.20 | 0.12 | 0.14 | 0.08 | 0.06 | 0.17 | 0.13 | 0.04 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | 0.02 | 0.02 | 0.00 | 0.00 | 0.00 | 0.02 | 0.00 | 0.04 | 0.00 | 0.04 | 0.05 | 0.00 | 0.01 | 0.07 |
| MAUT2 | 0.24 | 0.14 | 0.14 | 0.16 | 0.14 | 0.11 | 0.16 | 0.14 | 0.18 | 0.12 | 0.10 | 0.13 | 0.12 | 0.10 | 0.12 | 0.17 | 0.18 | 0.14 | 0.13 | 0.14 | 0.12 | 0.14 | 0.09 | 0.10 | 0.12 | 0.10 | 0.19 | 0.19 | 0.29 | 1.00 | 0.17 | 0.21 | 0.21 | 0.12 | 0.10 | 0.11 | 0.16 | 0.14 | 0.15 | 0.12 | 0.03 | 0.00 | 0.02 | 0.01 | 0.00 | 0.00 | 0.00 | 0.02 | 0.04 | 0.00 | 0.00 | 0.01 | 0.00 | 0.00 | 0.00 | 0.06 | 0.09 | 0.03 | 0.00 | 0.00 | 0.00 | 0.00 |
| MAUT0 | 0.28 | 0.18 | 0.12 | 0.15 | 0.20 | 0.13 | 0.31 | 0.16 | 0.39 | 0.28 | 0.10 | 0.18 | 0.13 | 0.09 | 0.13 | 0.15 | 0.13 | 0.11 | 0.12 | 0.12 | 0.17 | 0.14 | 0.08 | 0.12 | 0.12 | 0.21 | 0.18 | 0.18 | 0.64 | 0.17 | 1.00 | 0.21 | 0.21 | 0.24 | 0.12 | 0.15 | 0.10 | 0.08 | 0.20 | 0.18 | 0.05 | 0.00 | 0.02 | 0.02 | 0.00 | 0.00 | 0.00 | 0.02 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | 0.07 | 0.00 | 0.03 | 0.04 | 0.00 | 0.03 | 0.08 |
| MZFONDS | 0.26 | 0.19 | 0.14 | 0.12 | 0.16 | 0.14 | 0.18 | 0.17 | 0.16 | 0.12 | 0.10 | 0.09 | 0.25 | 0.13 | 0.22 | 0.30 | 0.18 | 0.13 | 0.14 | 0.18 | 0.17 | 0.29 | 0.12 | 0.12 | 0.22 | 0.16 | 0.20 | 0.20 | 0.14 | 0.21 | 0.21 | 1.00 | 0.99 | 0.15 | 0.15 | 0.19 | 0.17 | 0.13 | 0.20 | 0.19 | 0.05 | 0.00 | 0.03 | 0.02 | 0.00 | 0.00 | 0.00 | 0.00 | 0.02 | 0.00 | 0.00 | 0.02 | 0.02 | 0.02 | 0.00 | 0.05 | 0.04 | 0.00 | 0.02 | 0.06 | 0.03 | 0.05 |
| MZPART | 0.26 | 0.19 | 0.14 | 0.12 | 0.16 | 0.14 | 0.18 | 0.17 | 0.16 | 0.12 | 0.10 | 0.09 | 0.24 | 0.13 | 0.22 | 0.30 | 0.18 | 0.13 | 0.14 | 0.18 | 0.17 | 0.29 | 0.12 | 0.12 | 0.22 | 0.16 | 0.20 | 0.20 | 0.14 | 0.21 | 0.21 | 0.99 | 1.00 | 0.15 | 0.14 | 0.19 | 0.17 | 0.13 | 0.21 | 0.19 | 0.05 | 0.00 | 0.02 | 0.02 | 0.00 | 0.00 | 0.00 | 0.00 | 0.02 | 0.00 | 0.00 | 0.02 | 0.02 | 0.01 | 0.00 | 0.05 | 0.04 | 0.00 | 0.02 | 0.06 | 0.03 | 0.05 |
| MINKM30 | 0.23 | 0.17 | 0.10 | 0.12 | 0.11 | 0.12 | 0.21 | 0.10 | 0.23 | 0.23 | 0.13 | 0.16 | 0.16 | 0.13 | 0.18 | 0.15 | 0.15 | 0.11 | 0.13 | 0.13 | 0.20 | 0.15 | 0.12 | 0.13 | 0.16 | 0.18 | 0.22 | 0.22 | 0.20 | 0.12 | 0.24 | 0.15 | 0.15 | 1.00 | 0.27 | 0.26 | 0.15 | 0.08 | 0.36 | 0.18 | 0.03 | 0.00 | 0.01 | 0.04 | 0.01 | 0.00 | 0.00 | 0.00 | 0.02 | 0.00 | 0.00 | 0.04 | 0.00 | 0.00 | 0.00 | 0.07 | 0.00 | 0.00 | 0.03 | 0.00 | 0.01 | 0.09 |
| MINK3045 | 0.21 | 0.13 | 0.08 | 0.10 | 0.12 | 0.09 | 0.12 | 0.09 | 0.12 | 0.13 | 0.14 | 0.13 | 0.17 | 0.16 | 0.17 | 0.13 | 0.14 | 0.11 | 0.15 | 0.13 | 0.14 | 0.17 | 0.12 | 0.14 | 0.17 | 0.14 | 0.11 | 0.11 | 0.12 | 0.10 | 0.12 | 0.15 | 0.14 | 0.27 | 1.00 | 0.26 | 0.15 | 0.13 | 0.21 | 0.12 | 0.02 | 0.00 | 0.03 | 0.03 | 0.00 | 0.01 | 0.02 | 0.02 | 0.03 | 0.02 | 0.03 | 0.01 | 0.00 | 0.00 | 0.00 | 0.02 | 0.03 | 0.03 | 0.02 | 0.00 | 0.00 | 0.00 |
| MINK4575 | 0.25 | 0.18 | 0.08 | 0.11 | 0.15 | 0.11 | 0.15 | 0.11 | 0.14 | 0.15 | 0.12 | 0.15 | 0.20 | 0.14 | 0.20 | 0.21 | 0.23 | 0.15 | 0.12 | 0.16 | 0.15 | 0.26 | 0.17 | 0.15 | 0.17 | 0.17 | 0.19 | 0.19 | 0.14 | 0.11 | 0.15 | 0.19 | 0.19 | 0.26 | 0.26 | 1.00 | 0.12 | 0.09 | 0.29 | 0.18 | 0.03 | 0.00 | 0.04 | 0.02 | 0.00 | 0.00 | 0.00 | 0.00 | 0.05 | 0.01 | 0.00 | 0.03 | 0.02 | 0.04 | 0.03 | 0.08 | 0.00 | 0.00 | 0.02 | 0.04 | 0.00 | 0.07 |
| MINK7512 | 0.16 | 0.10 | 0.14 | 0.08 | 0.08 | 0.04 | 0.10 | 0.09 | 0.09 | 0.10 | 0.08 | 0.09 | 0.21 | 0.09 | 0.14 | 0.18 | 0.19 | 0.09 | 0.10 | 0.14 | 0.13 | 0.18 | 0.11 | 0.12 | 0.14 | 0.11 | 0.15 | 0.15 | 0.08 | 0.16 | 0.10 | 0.17 | 0.17 | 0.15 | 0.15 | 0.12 | 1.00 | 0.14 | 0.33 | 0.13 | 0.06 | 0.00 | 0.00 | 0.00 | 0.01 | 0.00 | 0.00 | 0.03 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | 0.02 | 0.00 | 0.04 | 0.04 | 0.00 | 0.00 | 0.00 | 0.10 | 0.06 |
| MINK123M | 0.14 | 0.08 | 0.13 | 0.08 | 0.09 | 0.09 | 0.07 | 0.07 | 0.07 | 0.08 | 0.09 | 0.08 | 0.20 | 0.07 | 0.11 | 0.17 | 0.17 | 0.06 | 0.10 | 0.12 | 0.11 | 0.16 | 0.16 | 0.09 | 0.11 | 0.11 | 0.11 | 0.11 | 0.06 | 0.14 | 0.08 | 0.13 | 0.13 | 0.08 | 0.13 | 0.09 | 0.14 | 1.00 | 0.37 | 0.09 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | 0.03 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 |
| MINKGEM | 0.26 | 0.20 | 0.13 | 0.12 | 0.10 | 0.11 | 0.19 | 0.08 | 0.23 | 0.20 | 0.13 | 0.13 | 0.24 | 0.13 | 0.21 | 0.22 | 0.20 | 0.09 | 0.14 | 0.14 | 0.17 | 0.22 | 0.12 | 0.13 | 0.18 | 0.16 | 0.19 | 0.19 | 0.17 | 0.15 | 0.20 | 0.20 | 0.21 | 0.36 | 0.21 | 0.29 | 0.33 | 0.37 | 1.00 | 0.22 | 0.02 | 0.00 | 0.00 | 0.02 | 0.00 | 0.00 | 0.00 | 0.00 | 0.02 | 0.00 | 0.00 | 0.01 | 0.00 | 0.02 | 0.00 | 0.06 | 0.00 | 0.00 | 0.04 | 0.00 | 0.02 | 0.11 |
| MKOOPKLA | 0.95 | 0.61 | 0.10 | 0.10 | 0.09 | 0.09 | 0.17 | 0.08 | 0.17 | 0.16 | 0.11 | 0.16 | 0.21 | 0.13 | 0.22 | 0.21 | 0.10 | 0.12 | 0.12 | 0.15 | 0.17 | 0.18 | 0.12 | 0.10 | 0.18 | 0.16 | 0.21 | 0.21 | 0.13 | 0.12 | 0.18 | 0.19 | 0.19 | 0.18 | 0.12 | 0.18 | 0.13 | 0.09 | 0.22 | 1.00 | 0.06 | 0.00 | 0.05 | 0.02 | 0.00 | 0.03 | 0.00 | 0.02 | 0.05 | 0.02 | 0.03 | 0.03 | 0.00 | 0.03 | 0.00 | 0.11 | 0.00 | 0.02 | 0.04 | 0.02 | 0.04 | 0.11 |
| PWAPART | 0.09 | 0.07 | 0.02 | 0.00 | 0.02 | 0.00 | 0.04 | 0.00 | 0.04 | 0.04 | 0.00 | 0.04 | 0.03 | 0.01 | 0.04 | 0.04 | 0.01 | 0.04 | 0.00 | 0.02 | 0.01 | 0.03 | 0.01 | 0.00 | 0.02 | 0.03 | 0.03 | 0.03 | 0.04 | 0.03 | 0.05 | 0.05 | 0.05 | 0.03 | 0.02 | 0.03 | 0.06 | 0.00 | 0.02 | 0.06 | 1.00 | 0.03 | 0.06 | 0.10 | 0.06 | 0.02 | 0.00 | 0.00 | 0.04 | 0.00 | 0.08 | 0.13 | 0.00 | 0.04 | 0.08 | 0.36 | 0.00 | 0.00 | 0.00 | 0.00 | 0.02 | 0.10 |
| PWABEDR | 0.02 | 0.02 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | 0.01 | 0.00 | 0.00 | 0.00 | 0.02 | 0.00 | 0.02 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | 0.06 | 0.00 | 0.00 | 0.02 | 0.02 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | 0.03 | 1.00 | 0.02 | 0.03 | 0.25 | 0.00 | 0.27 | 0.14 | 0.06 | 0.33 | 0.00 | 0.10 | 0.00 | 0.00 | 0.16 | 0.13 | 0.00 | 0.00 | 0.00 | 0.03 | 0.00 | 0.00 |
| PWALAND | 0.10 | 0.11 | 0.01 | 0.04 | 0.02 | 0.03 | 0.02 | 0.00 | 0.04 | 0.01 | 0.00 | 0.03 | 0.02 | 0.00 | 0.03 | 0.01 | 0.00 | 0.12 | 0.02 | 0.00 | 0.00 | 0.02 | 0.00 | 0.02 | 0.00 | 0.00 | 0.04 | 0.04 | 0.00 | 0.02 | 0.02 | 0.03 | 0.02 | 0.01 | 0.03 | 0.04 | 0.00 | 0.00 | 0.00 | 0.05 | 0.06 | 0.02 | 1.00 | 0.04 | 0.01 | 0.00 | 0.00 | 0.06 | 0.34 | 0.11 | 0.00 | 0.00 | 0.05 | 0.00 | 0.04 | 0.26 | 0.00 | 0.01 | 0.00 | 0.00 | 0.00 | 0.00 |
| PPERSAUT | 0.04 | 0.05 | 0.00 | 0.06 | 0.01 | 0.01 | 0.02 | 0.00 | 0.02 | 0.01 | 0.01 | 0.03 | 0.00 | 0.03 | 0.02 | 0.02 | 0.02 | 0.02 | 0.02 | 0.03 | 0.01 | 0.02 | 0.00 | 0.01 | 0.02 | 0.00 | 0.04 | 0.04 | 0.00 | 0.01 | 0.02 | 0.02 | 0.02 | 0.04 | 0.03 | 0.02 | 0.00 | 0.00 | 0.02 | 0.02 | 0.10 | 0.03 | 0.04 | 1.00 | 0.12 | 0.04 | 0.09 | 0.08 | 0.04 | 0.08 | 0.08 | 0.26 | 0.00 | 0.02 | 0.00 | 0.13 | 0.00 | 0.07 | 0.03 | 0.09 | 0.06 | 0.18 |
| PBESAUT | 0.05 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | 0.01 | 0.00 | 0.00 | 0.00 | 0.02 | 0.00 | 0.00 | 0.00 | 0.05 | 0.00 | 0.00 | 0.02 | 0.00 | 0.00 | 0.00 | 0.00 | 0.01 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | 0.01 | 0.00 | 0.00 | 0.01 | 0.00 | 0.00 | 0.00 | 0.06 | 0.25 | 0.01 | 0.12 | 1.00 | 0.00 | 0.17 | 0.17 | 0.04 | 0.24 | 0.00 | 0.10 | 0.00 | 0.00 | 0.03 | 0.09 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 |
| PMOTSCO | 0.05 | 0.03 | 0.00 | 0.00 | 0.00 | 0.00 | 0.03 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | 0.02 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | 0.01 | 0.00 | 0.00 | 0.00 | 0.00 | 0.03 | 0.02 | 0.00 | 0.00 | 0.04 | 0.00 | 1.00 | 0.00 | 0.07 | 0.00 | 0.00 | 0.00 | 0.00 | 0.02 | 0.00 | 0.00 | 0.02 | 0.00 | 0.00 | 0.00 | 0.00 | 0.02 | 0.06 |
| PVRAAUT | 0.00 | 0.01 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | 0.02 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | 0.01 | 0.01 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | 0.02 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | 0.27 | 0.00 | 0.09 | 0.17 | 0.00 | 1.00 | 0.22 | 0.08 | 0.12 | 0.00 | 0.00 | 0.00 | 0.00 | 0.04 | 0.04 | 0.00 | 0.00 | 0.01 | 0.00 | 0.00 | 0.00 |
| PAANHANG | 0.07 | 0.04 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | 0.06 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | 0.01 | 0.01 | 0.00 | 0.02 | 0.02 | 0.00 | 0.00 | 0.00 | 0.02 | 0.00 | 0.03 | 0.00 | 0.00 | 0.02 | 0.00 | 0.14 | 0.06 | 0.08 | 0.17 | 0.07 | 0.22 | 1.00 | 0.06 | 0.15 | 0.00 | 0.00 | 0.00 | 0.01 | 0.11 | 0.05 | 0.07 | 0.08 | 0.00 | 0.04 | 0.00 | 0.00 |
| PTRACTOR | 0.12 | 0.13 | 0.00 | 0.02 | 0.04 | 0.01 | 0.02 | 0.00 | 0.00 | 0.02 | 0.00 | 0.03 | 0.02 | 0.02 | 0.03 | 0.01 | 0.01 | 0.14 | 0.03 | 0.01 | 0.02 | 0.02 | 0.00 | 0.06 | 0.02 | 0.00 | 0.05 | 0.05 | 0.02 | 0.04 | 0.00 | 0.02 | 0.02 | 0.02 | 0.03 | 0.05 | 0.00 | 0.00 | 0.02 | 0.05 | 0.04 | 0.06 | 0.34 | 0.04 | 0.04 | 0.00 | 0.08 | 0.06 | 1.00 | 0.20 | 0.00 | 0.00 | 0.07 | 0.03 | 0.05 | 0.25 | 0.00 | 0.00 | 0.00 | 0.00 | 0.01 | 0.00 |
| PWERKT | 0.08 | 0.07 | 0.00 | 0.03 | 0.02 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | 0.02 | 0.13 | 0.02 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | 0.02 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | 0.02 | 0.01 | 0.00 | 0.00 | 0.00 | 0.02 | 0.00 | 0.33 | 0.11 | 0.08 | 0.24 | 0.00 | 0.12 | 0.15 | 0.20 | 1.00 | 0.00 | 0.17 | 0.13 | 0.00 | 0.08 | 0.22 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 |
| PBROM | 0.08 | 0.04 | 0.01 | 0.02 | 0.02 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | 0.02 | 0.03 | 0.02 | 0.01 | 0.00 | 0.00 | 0.03 | 0.00 | 0.00 | 0.01 | 0.01 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | 0.03 | 0.00 | 0.00 | 0.00 | 0.00 | 0.03 | 0.08 | 0.00 | 0.00 | 0.08 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | 0.00 | 0.00 | 0.00 | 0.00 | 0.08 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | 0.04 |
| PLEVEN | 0.00 | 0.04 | 0.00 | 0.00 | 0.03 | 0.00 | 0.01 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | 0.04 | 0.03 | 0.02 | 0.03 | 0.02 | 0.00 | 0.00 | 0.00 | 0.03 | 0.00 | 0.01 | 0.00 | 0.00 | 0.01 | 0.01 | 0.00 | 0.01 | 0.00 | 0.02 | 0.02 | 0.04 | 0.01 | 0.03 | 0.00 | 0.00 | 0.01 | 0.03 | 0.13 | 0.10 | 0.00 | 0.26 | 0.10 | 0.00 | 0.00 | 0.00 | 0.00 | 0.17 | 0.00 | 1.00 | 0.03 | 0.13 | 0.00 | 0.13 | 0.00 | 0.00 | 0.00 | 0.00 | 0.04 | 0.02 |
| PPERSONG | 0.00 | 0.02 | 0.00 | 0.01 | 0.00 | 0.02 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | 0.03 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | 0.02 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | 0.02 | 0.02 | 0.00 | 0.00 | 0.02 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | 0.05 | 0.00 | 0.00 | 0.02 | 0.00 | 0.00 | 0.07 | 0.13 | 0.00 | 0.03 | 1.00 | 0.00 | 0.00 | 0.04 | 0.00 | 0.03 | 0.00 | 0.00 | 0.00 | 0.00 |
| PGEZONG | 0.02 | 0.03 | 0.00 | 0.00 | 0.00 | 0.00 | 0.01 | 0.00 | 0.02 | 0.01 | 0.05 | 0.02 | 0.00 | 0.04 | 0.04 | 0.03 | 0.00 | 0.00 | 0.00 | 0.01 | 0.00 | 0.01 | 0.04 | 0.00 | 0.02 | 0.00 | 0.03 | 0.03 | 0.02 | 0.00 | 0.00 | 0.02 | 0.01 | 0.00 | 0.00 | 0.04 | 0.02 | 0.00 | 0.02 | 0.03 | 0.04 | 0.00 | 0.00 | 0.02 | 0.00 | 0.00 | 0.00 | 0.01 | 0.03 | 0.00 | 0.00 | 0.13 | 0.00 | 1.00 | 0.00 | 0.04 | 0.00 | 0.00 | 0.02 | 0.01 | 0.11 | 0.05 |
| PWAOREG | 0.05 | 0.00 | 0.00 | 0.00 | 0.00 | 0.01 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | 0.03 | 0.00 | 0.00 | 0.00 | 0.02 | 0.00 | 0.00 | 0.03 | 0.00 | 0.00 | 0.02 | 0.00 | 0.25 | 0.00 | 0.00 | 0.02 | 0.02 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | 0.03 | 0.00 | 0.00 | 0.00 | 0.00 | 0.08 | 0.16 | 0.04 | 0.00 | 0.03 | 0.00 | 0.04 | 0.11 | 0.05 | 0.08 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | 0.07 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | 0.03 |
| PBRAND | 0.17 | 0.16 | 0.01 | 0.06 | 0.03 | 0.05 | 0.07 | 0.03 | 0.06 | 0.07 | 0.03 | 0.07 | 0.03 | 0.03 | 0.04 | 0.04 | 0.08 | 0.09 | 0.04 | 0.03 | 0.03 | 0.05 | 0.03 | 0.03 | 0.03 | 0.04 | 0.11 | 0.11 | 0.04 | 0.06 | 0.07 | 0.05 | 0.05 | 0.07 | 0.02 | 0.08 | 0.04 | 0.03 | 0.06 | 0.11 | 0.36 | 0.13 | 0.26 | 0.13 | 0.09 | 0.02 | 0.04 | 0.05 | 0.25 | 0.22 | 0.08 | 0.13 | 0.04 | 0.04 | 0.07 | 1.00 | 0.00 | 0.00 | 0.04 | 0.05 | 0.03 | 0.15 |
| PZEILPL | 0.00 | 0.03 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | 0.04 | 0.03 | 0.07 | 0.00 | 0.00 | 0.00 | 0.05 | 0.00 | 0.02 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | 0.09 | 0.00 | 0.04 | 0.04 | 0.00 | 0.03 | 0.00 | 0.04 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | 0.07 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | 0.29 | 0.00 | 0.11 | 0.00 | 0.03 |
| PPLEZIER | 0.02 | 0.00 | 0.00 | 0.02 | 0.00 | 0.00 | 0.02 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | 0.02 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | 0.02 | 0.04 | 0.00 | 0.00 | 0.04 | 0.03 | 0.03 | 0.00 | 0.00 | 0.00 | 0.03 | 0.00 | 0.00 | 0.00 | 0.00 | 0.02 | 0.00 | 0.00 | 0.01 | 0.07 | 0.00 | 0.00 | 0.00 | 0.08 | 0.00 | 0.00 | 0.00 | 0.00 | 0.03 | 0.00 | 0.00 | 0.00 | 0.29 | 1.00 | 0.00 | 0.05 | 0.00 | 0.11 |
| PFIETS | 0.05 | 0.03 | 0.02 | 0.00 | 0.02 | 0.04 | 0.04 | 0.02 | 0.06 | 0.00 | 0.02 | 0.03 | 0.06 | 0.04 | 0.04 | 0.05 | 0.02 | 0.04 | 0.02 | 0.04 | 0.06 | 0.04 | 0.04 | 0.00 | 0.01 | 0.00 | 0.03 | 0.03 | 0.05 | 0.00 | 0.04 | 0.02 | 0.02 | 0.03 | 0.02 | 0.02 | 0.00 | 0.00 | 0.04 | 0.04 | 0.00 | 0.00 | 0.00 | 0.03 | 0.00 | 0.00 | 0.01 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | 0.02 | 0.00 | 0.04 | 0.00 | 0.00 | 1.00 | 0.00 | 0.01 | 0.02 |
| PINBOED | 0.03 | 0.02 | 0.00 | 0.00 | 0.00 | 0.00 | 0.03 | 0.00 | 0.02 | 0.00 | 0.00 | 0.02 | 0.03 | 0.00 | 0.03 | 0.04 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | 0.05 | 0.03 | 0.00 | 0.02 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | 0.06 | 0.06 | 0.00 | 0.00 | 0.04 | 0.00 | 0.00 | 0.00 | 0.02 | 0.00 | 0.03 | 0.00 | 0.09 | 0.00 | 0.00 | 0.00 | 0.04 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | 0.01 | 0.00 | 0.05 | 0.11 | 0.05 | 0.00 | 1.00 | 0.01 | 0.00 |
| PBYSTAND | 0.04 | 0.05 | 0.00 | 0.02 | 0.00 | 0.01 | 0.02 | 0.00 | 0.00 | 0.02 | 0.03 | 0.01 | 0.03 | 0.03 | 0.02 | 0.00 | 0.03 | 0.00 | 0.02 | 0.00 | 0.02 | 0.04 | 0.00 | 0.00 | 0.00 | 0.00 | 0.03 | 0.03 | 0.01 | 0.00 | 0.03 | 0.03 | 0.03 | 0.01 | 0.00 | 0.00 | 0.10 | 0.00 | 0.02 | 0.04 | 0.02 | 0.00 | 0.00 | 0.06 | 0.00 | 0.02 | 0.00 | 0.00 | 0.01 | 0.00 | 0.00 | 0.04 | 0.00 | 0.11 | 0.00 | 0.03 | 0.00 | 0.00 | 0.01 | 0.01 | 1.00 | 0.07 |
| CARAVAN | 0.12 | 0.12 | 0.00 | 0.05 | 0.03 | 0.04 | 0.06 | 0.01 | 0.06 | 0.04 | 0.00 | 0.02 | 0.08 | 0.04 | 0.09 | 0.07 | 0.00 | 0.04 | 0.05 | 0.06 | 0.05 | 0.09 | 0.05 | 0.00 | 0.07 | 0.06 | 0.08 | 0.08 | 0.07 | 0.00 | 0.08 | 0.05 | 0.05 | 0.09 | 0.00 | 0.07 | 0.06 | 0.00 | 0.11 | 0.11 | 0.10 | 0.00 | 0.00 | 0.18 | 0.00 | 0.06 | 0.00 | 0.00 | 0.00 | 0.00 | 0.04 | 0.02 | 0.00 | 0.05 | 0.03 | 0.15 | 0.03 | 0.11 | 0.02 | 0.00 | 0.07 | 1.00 |
#Heatmap of correlation matrix
mask = np.zeros_like(df_vcramer, dtype = np.bool)
mask[np.triu_indices_from(mask)] = True
with sns.axes_style("white"):
fig, axx = plt.subplots(figsize=(15,15))
ax = sns.heatmap(df_vcramer, mask = mask, vmin = 0, vmax = 1, square = True, ax = axx)
plt.show()
def plot_feature(df, col_name, isContinuous, target):
"""
Visualize a variable with and without faceting on the loan status.
- df dataframe
- col_name is the variable name in the dataframe
- full_name is the full variable name
- continuous is True if the variable is continuous, False otherwise
"""
f, (ax1, ax2) = plt.subplots(nrows=1, ncols=2, figsize=(12,3), dpi=90)
count_null = df[col_name].isnull().sum()
if isContinuous:
sns.histplot(df.loc[df[col_name].notnull(), col_name], kde=False, ax=ax1)
else:
sns.countplot(df[col_name], order=sorted(df[col_name].unique()), color='#5975A4', saturation=1, ax=ax1)
ax1.set_xlabel(col_name)
ax1.set_ylabel('Count')
ax1.set_title(col_name+ ' Number of nulls: '+str(count_null))
plt.xticks(rotation = 90)
if isContinuous:
sns.boxplot(x=col_name, y=target, data=df, ax=ax2)
ax2.set_ylabel('')
ax2.set_title(col_name + ' by '+target)
else:
data = df.groupby(col_name)[target].value_counts(normalize=True).to_frame('proportion').reset_index()
data.columns = [i, target, 'proportion']
#sns.barplot(x = col_name, y = 'proportion', hue= target, data = data, saturation=1, ax=ax2)
sns.barplot(x = col_name, y = 'proportion', hue= target, data = data, saturation=1, ax=ax2)
ax2.set_ylabel(target+' fraction')
ax2.set_title(target)
plt.xticks(rotation = 90)
ax2.set_xlabel(col_name)
plt.tight_layout()
for i in list(df_ticdata.columns):
if (df_ticdata[i].dtype==float) & (i!='CARAVAN'):
plot_feature(df_ticdata, col_name=i, isContinuous=True, target='CARAVAN')
elif i!='CARAVAN':
plot_feature(df_ticdata, col_name=i, isContinuous=False, target='CARAVAN')
The most important detail of this case, there are some variables which represented the same information, with the peculiarity some are categoricals and other are numericals.